Ocean
Loading...
Searching...
No Matches
AdvancedFrameChannels.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_CV_ADVANCED_ADVANCED_FRAME_CHANNLES_H
9#define META_OCEAN_CV_ADVANCED_ADVANCED_FRAME_CHANNLES_H
10
12#include "ocean/cv/NEON.h"
13
14#include "ocean/base/Frame.h"
15
16namespace Ocean
17{
18
19namespace CV
20{
21
22namespace Advanced
23{
24
25/**
26 * This class implements advanced frame channel conversion, transformation and extraction functions.
27 * @ingroup cvadvanced
28 */
29class OCEAN_CV_ADVANCED_EXPORT AdvancedFrameChannels
30{
31 public:
32
33 /**
34 * Definition of a constant to specify that the number of channels are not known at compile time but at runtime only.
35 */
36 static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME = 0u;
37
38 /**
39 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
40 * In addition to CV::FrameChannels::separateTo1Channel(), this function supports multiplication factors for source and target elements.<br>
41 * Usage:
42 * @code
43 * const unsigned int width = ...;
44 * const unsigned int height = ...;
45 *
46 * uint8_t* sourceFrame = ...;
47 * const unsigned int sourceFramePaddingElements = ...;
48 *
49 * constexpr unsigned int channels = 2u;
50 *
51 * const float* targetFrames[channels] = {..., ...};
52 * const unsigned int targetFramesPaddingElements[2] = {..., ...};
53 *
54 * constexpr uint8_t sourceFactor = 1u;
55 * constexpr uint8_t targetFactor = 1.0f / 255.0f;
56 *
57 * separateTo1Channel<uint8_t, float, channels>(sourceFrame, targetFrames, width, height, channels, sourceFactor, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements);
58 * @endcode
59 * @param sourceFrame The frame to be separated, must be valid
60 * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
61 * @param width The width of the source frame in pixel, with range [1, infinity)
62 * @param height The height of the source frame in pixel, with range [1, infinity)
63 * @param channels The number of channels the source frame has, with range [1, infinity)
64 * @param sourceFactor The multiplication factor each source element will be multiplied with, with range (-infinity, infinity)/{0}
65 * @param targetFactor The multiplication factor each target element will be multiplied with, with range (-infinity, infinity)/{0}
66 * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
67 * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity), nullptr if all are zero
68 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
69 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
70 * @tparam tChannels The number of source frames (and target channels) if known at compile time; otherwise CHANNELS_NOT_KNOWN_AT_COMPILE_TIME == 0, if know at compile time must be identical with 'channels'
71 */
72 template <typename TSource, typename TTarget, unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
73 static void separateTo1Channel(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements);
74
75 /**
76 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
77 * In addition to CV::FrameChannels::separateTo1Channel(), this function supports multiplication factors for source and target elements.<br>
78 * Usage:
79 * @code
80 * const unsigned int width = ...;
81 * const unsigned int height = ...;
82 *
83 * const uint8_t* sourceFrame = ...;
84 * const unsigned int sourceFramePaddingElements = ...;
85 *
86 * float* targetFrame0 = ...;
87 * float* targetFrame1 = ...;
88 * const unsigned int targetFramePaddingElements0 = ...;
89 * const unsigned int targetFramePaddingElements1 = ...;
90 *
91 * constexpr uint8_t sourceFactor = 1u;
92 * constexpr uint8_t targetFactor = 1.0f / 255.0f;
93 *
94 * separateTo1Channel<uint8_t, float>(sourceFrame, {targetFrame0, targetFrame1}, width, height, sourceFactor, targetFactor, sourceFramePaddingElements, {targetFramePaddingElements0, targetFramePaddingElements1});
95 * @endcode
96 * @param sourceFrame The frame to be separated, must be valid
97 * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
98 * @param width The width of the source frame in pixel, with range [1, infinity)
99 * @param height The height of the source frame in pixel, with range [1, infinity)
100 * @param sourceFactor The multiplication factor each source element will be multiplied with, with range (-infinity, infinity)/{0}
101 * @param targetFactor The multiplication factor each target element will be multiplied with, with range (-infinity, infinity)/{0}
102 * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
103 * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity)
104 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
105 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
106 */
107 template <typename TSource, typename TTarget>
108 static void separateTo1Channel(const TSource* const sourceFrame, const std::initializer_list<TTarget*>& targetFrames, const unsigned int width, const unsigned int height, const TSource sourceFactor, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const std::initializer_list<const unsigned int>& targetFramesPaddingElements);
109
110 /**
111 * Zips/interleaves 1-channel images into one image with n-channels.
112 * In addition to CV::FrameChannels::targetFramePaddingElements(), this function supports multiplication factors for source and target elements.<br>
113 * Usage:
114 * @code
115 * const unsigned int width = ...;
116 * const unsigned int height = ...;
117 *
118 * const float* sourceFrames[2] = {..., ...};
119 * const unsigned int sourceFramesPaddingElements[2] = {..., ...};
120 *
121 * uint8_t* targetFrame = ...;
122 * const unsigned int targetFramePaddingElements = ...;
123 *
124 * constexpr float sourceFactor = 255.0f;
125 * constexpr uint8_t targetFactor = 1u;
126 *
127 * zipChannels<float, uint8_t>(sourceFrames, targetFrame, width, height, 2u, sourceFactor, targetFactor, sourceFramesPaddingElements, targetFramePaddingElements);
128 * @endcode
129 * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
130 * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
131 * @param width The width of the source frames in pixel, with range [1, infinity)
132 * @param height The height of the source frames in pixel, with range [1, infinity)
133 * @param channels The number of provided source frames (and the number of channels the target frame will have), with range [1, infinity)
134 * @param sourceFactor The multiplication factor each source element will be multiplied with, with range (-infinity, infinity)/{0}
135 * @param targetFactor The multiplication factor each target element will be multiplied with, with range (-infinity, infinity)/{0}
136 * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity), nullptr if all are zero
137 * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
138 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
139 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
140 * @tparam tChannels The number of source frames (and target channels) if known at compile time; otherwise CHANNELS_NOT_KNOWN_AT_COMPILE_TIME == 0, if know at compile time must be identical with 'channels'
141 */
142 template <typename TSource, typename TTarget, unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
143 static void zipChannels(const TSource* const* const sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
144
145 /**
146 * Zips/interleaves 1-channel images into one image with n-channels.
147 * In addition to CV::FrameChannels::targetFramePaddingElements(), this function supports multiplication factors for source and target elements.<br>
148 * Usage:
149 * @code
150 * const unsigned int width = ...;
151 * const unsigned int height = ...;
152 *
153 * const float* sourceFrame0 = ...;
154 * const float* sourceFrame1 = ...;
155 * const unsigned int sourceFramePaddingElements0 = ...;
156 * const unsigned int sourceFramePaddingElements1 = ...;
157 *
158 * uint8_t* targetFrame = ...;
159 * const unsigned int targetFramePaddingElements = ...;
160 *
161 * constexpr float sourceFactor = 255.0f;
162 * constexpr uint8_t targetFactor = 1u;
163 *
164 * zipChannels<float, uint8_t>({sourceFrame0, sourceFrame1}, targetFrame, width, height, sourceFactor, targetFactor, {sourceFramePaddingElements0, sourceFramePaddingElements1}, targetFramePaddingElements);
165 * @endcode
166 * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
167 * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
168 * @param width The width of the source frames in pixel, with range [1, infinity)
169 * @param height The height of the source frames in pixel, with range [1, infinity)
170 * @param sourceFactor The multiplication factor each source element will be multiplied with, with range (-infinity, infinity)/{0}
171 * @param targetFactor The multiplication factor each target element will be multiplied with, with range (-infinity, infinity)/{0}
172 * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity)
173 * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
174 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
175 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
176 */
177 template <typename TSource, typename TTarget>
178 static void zipChannels(const std::initializer_list<const TSource*>& sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const TSource sourceFactor, const TTarget targetFactor, const std::initializer_list<unsigned int>& sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
179
180 protected:
181
182 /**
183 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
184 * @param sourceFrame The frame to be separated, must be valid
185 * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
186 * @param width The width of the source frame in pixel, with range [1, infinity)
187 * @param height The height of the source frame in pixel, with range [1, infinity)
188 * @param channels The number of channels the source frame has, with range [1, infinity)
189 * @param sourceFactor The multiplication factor each source element will be multiplied with, with range (-infinity, infinity)/{0}
190 * @param targetFactor The multiplication factor each target element will be multiplied with, with range (-infinity, infinity)/{0}
191 * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
192 * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity)
193 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
194 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
195 */
196 template <typename TSource, typename TTarget>
197 static void separateTo1ChannelRuntime(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements);
198
199 /**
200 * Zips/interleaves 1-channel images into one image with n-channels.
201 * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
202 * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
203 * @param width The width of the source frames in pixel, with range [1, infinity)
204 * @param height The height of the source frames in pixel, with range [1, infinity)
205 * @param channels The number of provided source frames (and the number of channels the target frame will have), with range [1, infinity)
206 * @param sourceFactor The multiplication factor each source element will be multiplied with, with range (-infinity, infinity)/{0}
207 * @param targetFactor The multiplication factor each target element will be multiplied with, with range (-infinity, infinity)/{0}
208 * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity)
209 * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
210 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
211 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
212 */
213 template <typename TSource, typename TTarget>
214 static void zipChannelsRuntime(const TSource* const* const sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
215
216#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
217
218 /**
219 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
220 * @param sourceFrame The frame to be separated, must be valid
221 * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
222 * @param width The width of the source frame in pixel, with range [1, infinity)
223 * @param height The height of the source frame in pixel, with range [1, infinity)
224 * @param targetFactor The multiplication factor each target element will be multiplied with, with range (-infinity, infinity)/{0}
225 * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
226 * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity), nullptr if all are zero
227 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
228 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
229 * @tparam tChannels The number of source frames (and target channels) if known at compile time; otherwise CHANNELS_NOT_KNOWN_AT_COMPILE_TIME == 0, if know at compile time must be identical with 'channels'
230 */
231 template <typename TSource, typename TTarget, unsigned int tChannels>
232 static void separateTo1ChannelOnlyTargetFactorNEON(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements);
233
234 /**
235 * Zips/interleaves 1-channel images into one image with n-channels.
236 * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
237 * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
238 * @param width The width of the source frames in pixel, with range [1, infinity)
239 * @param height The height of the source frames in pixel, with range [1, infinity)
240 * @param sourceFactor The multiplication factor each source element will be multiplied with, with range (-infinity, infinity)/{0}
241 * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity)
242 * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
243 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
244 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
245 * @tparam tChannels The number of source frames (and target channels) if known at compile time; otherwise CHANNELS_NOT_KNOWN_AT_COMPILE_TIME == 0, if know at compile time must be identical with 'channels'
246 */
247 template <typename TSource, typename TTarget, unsigned int tChannels>
248 static void zipChannelsOnlySourceFactorNEON(const TSource* const* const sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const TSource sourceFactor, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
249
250#endif // OCEAN_HARDWARE_NEON_VERSION
251};
252
253template <typename TSource, typename TTarget, unsigned int tChannels>
254void AdvancedFrameChannels::separateTo1Channel(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
255{
256 ocean_assert(sourceFrame != nullptr);
257 ocean_assert(targetFrames != nullptr);
258
259 ocean_assert(width != 0u && height != 0u);
260
261 assert(sourceFactor != TSource(0) && targetFactor != TTarget(0));
262 assert(sourceFactor != TSource(1) || targetFactor != TTarget(1)); // Identity factors, use FrameChannels::separateTo1Channel() instead!
263
264 ocean_assert(tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME || tChannels == channels);
265
266 if constexpr (tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME)
267 {
268 separateTo1ChannelRuntime<TSource, TTarget>(sourceFrame, targetFrames, width, height, channels, sourceFactor, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements);
269 return;
270 }
271
272#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
273
274 if constexpr (std::is_same<TSource, uint8_t>::value && std::is_same<TTarget, float>::value && tChannels >= 2u && tChannels <= 4u)
275 {
276 if (sourceFactor == uint8_t(1))
277 {
278 separateTo1ChannelOnlyTargetFactorNEON<TSource, TTarget, tChannels>(sourceFrame, targetFrames, width, height, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements);
279
280 return;
281 }
282 }
283
284#endif // OCEAN_HARDWARE_NEON_VERSION
285
286#ifdef OCEAN_DEBUG
287 for (unsigned int c = 0u; c < tChannels; ++c)
288 {
289 ocean_assert(targetFrames[c] != nullptr);
290 }
291#endif
292
293 if (sourceFramePaddingElements == 0u && targetFramesPaddingElements == nullptr)
294 {
295 for (unsigned int n = 0u; n < width * height; ++n)
296 {
297 for (unsigned int c = 0u; c < tChannels; ++c)
298 {
299 targetFrames[c][n] = TTarget(sourceFrame[n * tChannels + c] * sourceFactor) * targetFactor;
300 }
301 }
302 }
303 else if (targetFramesPaddingElements == nullptr)
304 {
305 ocean_assert(sourceFramePaddingElements != 0u);
306
307 const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
308
309 for (unsigned int y = 0u; y < height; ++y)
310 {
311 const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
312
313 const unsigned int targetRowOffset = y * width;
314
315 for (unsigned int x = 0u; x < width; ++x)
316 {
317 for (unsigned int c = 0u; c < tChannels; ++c)
318 {
319 *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * tChannels + c) * sourceFactor) * targetFactor;
320 }
321 }
322 }
323 }
324 else
325 {
326 const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
327
328 Indices32 targetFrameStrideElements(tChannels);
329
330 for (unsigned int c = 0u; c < tChannels; ++c)
331 {
332 targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
333 }
334
335 for (unsigned int y = 0u; y < height; ++y)
336 {
337 const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
338
339 for (unsigned int x = 0u; x < width; ++x)
340 {
341 for (unsigned int c = 0u; c < tChannels; ++c)
342 {
343 *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * tChannels + c) * sourceFactor) * targetFactor;
344 }
345 }
346 }
347 }
348}
349
350template <typename TSource, typename TTarget>
351void AdvancedFrameChannels::separateTo1Channel(const TSource* const sourceFrame, const std::initializer_list<TTarget*>& targetFrames, const unsigned int width, const unsigned int height, const TSource sourceFactor, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const std::initializer_list<const unsigned int>& targetFramesPaddingElements)
352{
353 ocean_assert(targetFrames.size() >= 1);
354 ocean_assert(targetFramesPaddingElements.size() == 0 || targetFrames.size() == targetFramesPaddingElements.size());
355
356 assert(sourceFactor != TSource(0) && targetFactor != TTarget(0));
357 assert(sourceFactor != TSource(1) || targetFactor != TTarget(1)); // Identity factors, use FrameChannels::separateTo1Channel() instead!
358
359 if (targetFrames.size() == 2)
360 {
361 separateTo1Channel<TSource, TTarget, 2u>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
362 }
363 else if (targetFrames.size() == 3)
364 {
365 separateTo1Channel<TSource, TTarget, 3u>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
366 }
367 else if (targetFrames.size() == 4)
368 {
369 separateTo1Channel<TSource, TTarget, 4u>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
370 }
371 else
372 {
373 separateTo1Channel<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
374 }
375}
376
377template <typename TSource, typename TTarget, unsigned int tChannels>
378void AdvancedFrameChannels::zipChannels(const TSource* const* sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
379{
380 ocean_assert(sourceFrames != nullptr);
381 ocean_assert(targetFrame != nullptr);
382
383 ocean_assert(width != 0u && height != 0u);
384
385 assert(sourceFactor != TSource(0) && targetFactor != TTarget(0));
386 assert(sourceFactor != TSource(1) || targetFactor != TTarget(1)); // Identity factors, use FrameChannels::separateTo1Channel() instead!
387
388 ocean_assert(tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME || tChannels == channels);
389
390 if constexpr (tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME)
391 {
392 zipChannelsRuntime<TSource, TTarget>(sourceFrames, targetFrame, width, height, channels, sourceFactor, targetFactor, sourceFramesPaddingElements, targetFramePaddingElements);
393 return;
394 }
395
396#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
397
398 if constexpr (std::is_same<TSource, float>::value && std::is_same<TTarget, uint8_t>::value && tChannels >= 2u && tChannels <= 4u)
399 {
400 if (targetFactor == uint8_t(1))
401 {
402 zipChannelsOnlySourceFactorNEON<TSource, TTarget, tChannels>(sourceFrames, targetFrame, width, height, sourceFactor, sourceFramesPaddingElements, targetFramePaddingElements);
403
404 return;
405 }
406 }
407
408#endif // OCEAN_HARDWARE_NEON_VERSION
409
410 bool allSourceFramesContinuous = true;
411
412 if (sourceFramesPaddingElements != nullptr)
413 {
414 for (unsigned int n = 0u; n < tChannels; ++n)
415 {
416 if (sourceFramesPaddingElements[n] != 0u)
417 {
418 allSourceFramesContinuous = false;
419 break;
420 }
421 }
422 }
423
424 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
425 {
426 for (unsigned int n = 0u; n < width * height; ++n)
427 {
428 for (unsigned int c = 0u; c < tChannels; ++c)
429 {
430 targetFrame[n * tChannels + c] = TTarget(sourceFrames[c][n] * sourceFactor) * targetFactor;
431 }
432 }
433 }
434 else
435 {
436 const unsigned int targetFrameStrideElements = width * tChannels + targetFramePaddingElements;
437
438 Indices32 sourceFrameStrideElements(tChannels);
439
440 for (unsigned int c = 0u; c < tChannels; ++c)
441 {
442 if (sourceFramesPaddingElements == nullptr)
443 {
444 sourceFrameStrideElements[c] = width;
445 }
446 else
447 {
448 sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
449 }
450 }
451
452 for (unsigned int y = 0u; y < height; ++y)
453 {
454 TTarget* const targetRow = targetFrame + y * targetFrameStrideElements;
455
456 for (unsigned int x = 0u; x < width; ++x)
457 {
458 for (unsigned int c = 0u; c < tChannels; ++c)
459 {
460 *(targetRow + x * tChannels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x) * sourceFactor) * targetFactor;
461 }
462 }
463 }
464 }
465}
466
467template <typename TSource, typename TTarget>
468void AdvancedFrameChannels::zipChannels(const std::initializer_list<const TSource*>& sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const TSource sourceFactor, const TTarget targetFactor, const std::initializer_list<unsigned int>& sourceFramePaddingElements, const unsigned int targetFramePaddingElements)
469{
470 ocean_assert(sourceFrames.size() >= 1);
471 ocean_assert(sourceFramePaddingElements.size() == 0 || sourceFrames.size() == sourceFramePaddingElements.size());
472
473 assert(sourceFactor != TSource(0) && targetFactor != TTarget(0));
474 assert(sourceFactor != TSource(1) || targetFactor != TTarget(1)); // Identity factors, use FrameChannels::separateTo1Channel() instead!
475
476 if (sourceFrames.size() == 2)
477 {
478 zipChannels<TSource, TTarget, 2u>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
479 }
480 else if (sourceFrames.size() == 3)
481 {
482 zipChannels<TSource, TTarget, 3u>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
483 }
484 else if (sourceFrames.size() == 4)
485 {
486 zipChannels<TSource, TTarget, 4u>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
487 }
488 else
489 {
490 zipChannels<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
491 }
492}
493
494template <typename TSource, typename TTarget>
495void AdvancedFrameChannels::separateTo1ChannelRuntime(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
496{
497 ocean_assert(sourceFrame != nullptr);
498 ocean_assert(targetFrames != nullptr);
499
500 ocean_assert(width != 0u && height != 0u);
501 ocean_assert(channels != 0u);
502
503 assert(sourceFactor != TSource(0) && targetFactor != TTarget(0));
504 assert(sourceFactor != TSource(1) || targetFactor != TTarget(1)); // Identity factors, use FrameChannels::separateTo1Channel() instead!
505
506#ifdef OCEAN_DEBUG
507 for (unsigned int c = 0u; c < channels; ++c)
508 {
509 ocean_assert(targetFrames[c] != nullptr);
510 }
511#endif
512
513 if (sourceFramePaddingElements == 0u && targetFramesPaddingElements == nullptr)
514 {
515 for (unsigned int n = 0u; n < width * height; ++n)
516 {
517 for (unsigned int c = 0u; c < channels; ++c)
518 {
519 targetFrames[c][n] = TTarget(sourceFrame[n * channels + c] * sourceFactor) * targetFactor;
520 }
521 }
522 }
523 else if (targetFramesPaddingElements == nullptr)
524 {
525 ocean_assert(sourceFramePaddingElements != 0u);
526
527 const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
528
529 for (unsigned int y = 0u; y < height; ++y)
530 {
531 const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
532
533 const unsigned int targetRowOffset = y * width;
534
535 for (unsigned int x = 0u; x < width; ++x)
536 {
537 for (unsigned int c = 0u; c < channels; ++c)
538 {
539 *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * channels + c) * sourceFactor) * targetFactor;
540 }
541 }
542 }
543 }
544 else
545 {
546 const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
547
548 Indices32 targetFrameStrideElements(channels);
549
550 for (unsigned int c = 0u; c < channels; ++c)
551 {
552 targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
553 }
554
555 for (unsigned int y = 0u; y < height; ++y)
556 {
557 const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
558
559 for (unsigned int x = 0u; x < width; ++x)
560 {
561 for (unsigned int c = 0u; c < channels; ++c)
562 {
563 *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * channels + c) * sourceFactor) * targetFactor;
564 }
565 }
566 }
567 }
568}
569
570template <typename TSource, typename TTarget>
571void AdvancedFrameChannels::zipChannelsRuntime(const TSource* const* sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
572{
573 ocean_assert(sourceFrames != nullptr);
574 ocean_assert(targetFrame != nullptr);
575
576 ocean_assert(width != 0u && height != 0u);
577 ocean_assert(channels != 0u);
578
579 assert(sourceFactor != TSource(0) && targetFactor != TTarget(0));
580 assert(sourceFactor != TSource(1) || targetFactor != TTarget(1)); // Identity factors, use FrameChannels::separateTo1Channel() instead!
581
582 bool allSourceFramesContinuous = true;
583
584 if (sourceFramesPaddingElements != nullptr)
585 {
586 for (unsigned int n = 0u; n < channels; ++n)
587 {
588 if (sourceFramesPaddingElements[n] != 0u)
589 {
590 allSourceFramesContinuous = false;
591 break;
592 }
593 }
594 }
595
596 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
597 {
598 for (unsigned int n = 0u; n < width * height; ++n)
599 {
600 for (unsigned int c = 0u; c < channels; ++c)
601 {
602 targetFrame[n * channels + c] = TTarget(sourceFrames[c][n] * sourceFactor) * targetFactor;
603 }
604 }
605 }
606 else
607 {
608 const unsigned int targetFrameStrideElements = width * channels + targetFramePaddingElements;
609
610 Indices32 sourceFrameStrideElements(channels);
611
612 for (unsigned int c = 0u; c < channels; ++c)
613 {
614 if (sourceFramesPaddingElements == nullptr)
615 {
616 sourceFrameStrideElements[c] = width;
617 }
618 else
619 {
620 sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
621 }
622 }
623
624 for (unsigned int y = 0u; y < height; ++y)
625 {
626 TTarget* const targetRow = targetFrame + y * targetFrameStrideElements;
627
628 for (unsigned int x = 0u; x < width; ++x)
629 {
630 for (unsigned int c = 0u; c < channels; ++c)
631 {
632 *(targetRow + x * channels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x) * sourceFactor) * targetFactor;
633 }
634 }
635 }
636 }
637}
638
639#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
640
641template <>
642inline void AdvancedFrameChannels::separateTo1ChannelOnlyTargetFactorNEON<uint8_t, float, 2u>(const uint8_t* const sourceFrame, float* const* const targetFrames, const unsigned int width, const unsigned int height, const float targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
643{
644 ocean_assert(sourceFrame != nullptr);
645 ocean_assert(targetFrames != nullptr);
646
647 ocean_assert(width != 0u && height != 0u);
648
649 ocean_assert(targetFactor != 0.0f);
650
651 constexpr unsigned int tChannels = 2u;
652
653 bool allTargetFramesContinuous = true;
654
655 if (targetFramesPaddingElements != nullptr)
656 {
657 for (unsigned int n = 0u; n < tChannels; ++n)
658 {
659 if (targetFramesPaddingElements[n] != 0u)
660 {
661 allTargetFramesContinuous = false;
662 break;
663 }
664 }
665 }
666
667 const float32x4_t targetFactor_32x4 = vdupq_n_f32(targetFactor);
668
669 const uint8_t* source = sourceFrame;
670 float* target0 = targetFrames[0];
671 float* target1 = targetFrames[1];
672
673 constexpr unsigned int tBlockSize = 16u;
674
675 uint8x16x2_t source_u_8x16x2;
676
677 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
678 {
679 const unsigned int pixels = width * height;
680 const unsigned int blocks = pixels / tBlockSize;
681 const unsigned int remaining = pixels % tBlockSize;
682
683 for (unsigned int n = 0u; n < blocks; ++n)
684 {
685 source_u_8x16x2 = vld2q_u8(source);
686
687 const float32x4x4_t sourceA_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x2.val[0]);
688 const float32x4x4_t sourceB_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x2.val[1]);
689
690 vst1q_f32(target0 + 0, vmulq_f32(sourceA_f_32x4x4.val[0], targetFactor_32x4));
691 vst1q_f32(target0 + 4, vmulq_f32(sourceA_f_32x4x4.val[1], targetFactor_32x4));
692 vst1q_f32(target0 + 8, vmulq_f32(sourceA_f_32x4x4.val[2], targetFactor_32x4));
693 vst1q_f32(target0 + 12, vmulq_f32(sourceA_f_32x4x4.val[3], targetFactor_32x4));
694
695 vst1q_f32(target1 + 0, vmulq_f32(sourceB_f_32x4x4.val[0], targetFactor_32x4));
696 vst1q_f32(target1 + 4, vmulq_f32(sourceB_f_32x4x4.val[1], targetFactor_32x4));
697 vst1q_f32(target1 + 8, vmulq_f32(sourceB_f_32x4x4.val[2], targetFactor_32x4));
698 vst1q_f32(target1 + 12, vmulq_f32(sourceB_f_32x4x4.val[3], targetFactor_32x4));
699
700 source += tBlockSize * tChannels;
701
702 target0 += tBlockSize;
703 target1 += tBlockSize;
704 }
705
706 for (unsigned int n = 0u; n < remaining; ++n)
707 {
708 target0[n] = float(source[n * tChannels + 0u]) * targetFactor;
709 target1[n] = float(source[n * tChannels + 1u]) * targetFactor;
710 }
711 }
712 else
713 {
714 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[0];
715 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[1];
716
717 const unsigned int blocks = width / tBlockSize;
718 const unsigned int remaining = width % tBlockSize;
719
720 for (unsigned int y = 0u; y < height; ++y)
721 {
722 for (unsigned int n = 0u; n < blocks; ++n)
723 {
724 source_u_8x16x2 = vld2q_u8(source);
725
726 const float32x4x4_t sourceA_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x2.val[0]);
727 const float32x4x4_t sourceB_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x2.val[1]);
728
729 vst1q_f32(target0 + 0, vmulq_f32(sourceA_f_32x4x4.val[0], targetFactor_32x4));
730 vst1q_f32(target0 + 4, vmulq_f32(sourceA_f_32x4x4.val[1], targetFactor_32x4));
731 vst1q_f32(target0 + 8, vmulq_f32(sourceA_f_32x4x4.val[2], targetFactor_32x4));
732 vst1q_f32(target0 + 12, vmulq_f32(sourceA_f_32x4x4.val[3], targetFactor_32x4));
733
734 vst1q_f32(target1 + 0, vmulq_f32(sourceB_f_32x4x4.val[0], targetFactor_32x4));
735 vst1q_f32(target1 + 4, vmulq_f32(sourceB_f_32x4x4.val[1], targetFactor_32x4));
736 vst1q_f32(target1 + 8, vmulq_f32(sourceB_f_32x4x4.val[2], targetFactor_32x4));
737 vst1q_f32(target1 + 12, vmulq_f32(sourceB_f_32x4x4.val[3], targetFactor_32x4));
738
739 source += tBlockSize * tChannels;
740
741 target0 += tBlockSize;
742 target1 += tBlockSize;
743 }
744
745 for (unsigned int n = 0u; n < remaining; ++n)
746 {
747 target0[n] = float(source[n * tChannels + 0u]) * targetFactor;
748 target1[n] = float(source[n * tChannels + 1u]) * targetFactor;
749 }
750
751 source += remaining * tChannels + sourceFramePaddingElements;
752 target0 += remaining + targetFrame0PaddingElements;
753 target1 += remaining + targetFrame1PaddingElements;
754 }
755 }
756}
757
758template <>
759inline void AdvancedFrameChannels::separateTo1ChannelOnlyTargetFactorNEON<uint8_t, float, 3u>(const uint8_t* const sourceFrame, float* const* const targetFrames, const unsigned int width, const unsigned int height, const float targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
760{
761 ocean_assert(sourceFrame != nullptr);
762 ocean_assert(targetFrames != nullptr);
763
764 ocean_assert(width != 0u && height != 0u);
765
766 constexpr unsigned int tChannels = 3u;
767
768 bool allTargetFramesContinuous = true;
769
770 if (targetFramesPaddingElements != nullptr)
771 {
772 for (unsigned int n = 0u; n < tChannels; ++n)
773 {
774 if (targetFramesPaddingElements[n] != 0u)
775 {
776 allTargetFramesContinuous = false;
777 break;
778 }
779 }
780 }
781
782 const float32x4_t targetFactor_32x4 = vdupq_n_f32(targetFactor);
783
784 const uint8_t* source = sourceFrame;
785 float* target0 = targetFrames[0];
786 float* target1 = targetFrames[1];
787 float* target2 = targetFrames[2];
788
789 constexpr unsigned int tBlockSize = 16u;
790
791 uint8x16x3_t source_u_8x16x3;
792
793 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
794 {
795 const unsigned int pixels = width * height;
796 const unsigned int blocks = pixels / tBlockSize;
797 const unsigned int remaining = pixels % tBlockSize;
798
799 for (unsigned int n = 0u; n < blocks; ++n)
800 {
801 source_u_8x16x3 = vld3q_u8(source);
802
803 const float32x4x4_t sourceA_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x3.val[0]);
804 const float32x4x4_t sourceB_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x3.val[1]);
805 const float32x4x4_t sourceC_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x3.val[2]);
806
807 vst1q_f32(target0 + 0, vmulq_f32(sourceA_f_32x4x4.val[0], targetFactor_32x4));
808 vst1q_f32(target0 + 4, vmulq_f32(sourceA_f_32x4x4.val[1], targetFactor_32x4));
809 vst1q_f32(target0 + 8, vmulq_f32(sourceA_f_32x4x4.val[2], targetFactor_32x4));
810 vst1q_f32(target0 + 12, vmulq_f32(sourceA_f_32x4x4.val[3], targetFactor_32x4));
811
812 vst1q_f32(target1 + 0, vmulq_f32(sourceB_f_32x4x4.val[0], targetFactor_32x4));
813 vst1q_f32(target1 + 4, vmulq_f32(sourceB_f_32x4x4.val[1], targetFactor_32x4));
814 vst1q_f32(target1 + 8, vmulq_f32(sourceB_f_32x4x4.val[2], targetFactor_32x4));
815 vst1q_f32(target1 + 12, vmulq_f32(sourceB_f_32x4x4.val[3], targetFactor_32x4));
816
817 vst1q_f32(target2 + 0, vmulq_f32(sourceC_f_32x4x4.val[0], targetFactor_32x4));
818 vst1q_f32(target2 + 4, vmulq_f32(sourceC_f_32x4x4.val[1], targetFactor_32x4));
819 vst1q_f32(target2 + 8, vmulq_f32(sourceC_f_32x4x4.val[2], targetFactor_32x4));
820 vst1q_f32(target2 + 12, vmulq_f32(sourceC_f_32x4x4.val[3], targetFactor_32x4));
821
822 source += tBlockSize * tChannels;
823
824 target0 += tBlockSize;
825 target1 += tBlockSize;
826 target2 += tBlockSize;
827 }
828
829 for (unsigned int n = 0u; n < remaining; ++n)
830 {
831 target0[n] = float(source[n * tChannels + 0u]) * targetFactor;
832 target1[n] = float(source[n * tChannels + 1u]) * targetFactor;
833 target2[n] = float(source[n * tChannels + 2u]) * targetFactor;
834 }
835 }
836 else
837 {
838 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[0];
839 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[1];
840 const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[2];
841
842 const unsigned int blocks = width / tBlockSize;
843 const unsigned int remaining = width % tBlockSize;
844
845 for (unsigned int y = 0u; y < height; ++y)
846 {
847 for (unsigned int n = 0u; n < blocks; ++n)
848 {
849 source_u_8x16x3 = vld3q_u8(source);
850
851 const float32x4x4_t sourceA_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x3.val[0]);
852 const float32x4x4_t sourceB_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x3.val[1]);
853 const float32x4x4_t sourceC_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x3.val[2]);
854
855 vst1q_f32(target0 + 0, vmulq_f32(sourceA_f_32x4x4.val[0], targetFactor_32x4));
856 vst1q_f32(target0 + 4, vmulq_f32(sourceA_f_32x4x4.val[1], targetFactor_32x4));
857 vst1q_f32(target0 + 8, vmulq_f32(sourceA_f_32x4x4.val[2], targetFactor_32x4));
858 vst1q_f32(target0 + 12, vmulq_f32(sourceA_f_32x4x4.val[3], targetFactor_32x4));
859
860 vst1q_f32(target1 + 0, vmulq_f32(sourceB_f_32x4x4.val[0], targetFactor_32x4));
861 vst1q_f32(target1 + 4, vmulq_f32(sourceB_f_32x4x4.val[1], targetFactor_32x4));
862 vst1q_f32(target1 + 8, vmulq_f32(sourceB_f_32x4x4.val[2], targetFactor_32x4));
863 vst1q_f32(target1 + 12, vmulq_f32(sourceB_f_32x4x4.val[3], targetFactor_32x4));
864
865 vst1q_f32(target2 + 0, vmulq_f32(sourceC_f_32x4x4.val[0], targetFactor_32x4));
866 vst1q_f32(target2 + 4, vmulq_f32(sourceC_f_32x4x4.val[1], targetFactor_32x4));
867 vst1q_f32(target2 + 8, vmulq_f32(sourceC_f_32x4x4.val[2], targetFactor_32x4));
868 vst1q_f32(target2 + 12, vmulq_f32(sourceC_f_32x4x4.val[3], targetFactor_32x4));
869
870 source += tBlockSize * tChannels;
871
872 target0 += tBlockSize;
873 target1 += tBlockSize;
874 target2 += tBlockSize;
875 }
876
877 for (unsigned int n = 0u; n < remaining; ++n)
878 {
879 target0[n] = float(source[n * tChannels + 0u]) * targetFactor;
880 target1[n] = float(source[n * tChannels + 1u]) * targetFactor;
881 target2[n] = float(source[n * tChannels + 2u]) * targetFactor;
882 }
883
884 source += remaining * tChannels + sourceFramePaddingElements;
885 target0 += remaining + targetFrame0PaddingElements;
886 target1 += remaining + targetFrame1PaddingElements;
887 target2 += remaining + targetFrame2PaddingElements;
888 }
889 }
890}
891
892template <>
893inline void AdvancedFrameChannels::separateTo1ChannelOnlyTargetFactorNEON<uint8_t, float, 4u>(const uint8_t* const sourceFrame, float* const* const targetFrames, const unsigned int width, const unsigned int height, const float targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
894{
895 ocean_assert(sourceFrame != nullptr);
896 ocean_assert(targetFrames != nullptr);
897
898 ocean_assert(width != 0u && height != 0u);
899
900 constexpr unsigned int tChannels = 4u;
901
902 bool allTargetFramesContinuous = true;
903
904 if (targetFramesPaddingElements != nullptr)
905 {
906 for (unsigned int n = 0u; n < tChannels; ++n)
907 {
908 if (targetFramesPaddingElements[n] != 0u)
909 {
910 allTargetFramesContinuous = false;
911 break;
912 }
913 }
914 }
915
916 const float32x4_t targetFactor_32x4 = vdupq_n_f32(targetFactor);
917
918 const uint8_t* source = sourceFrame;
919 float* target0 = targetFrames[0];
920 float* target1 = targetFrames[1];
921 float* target2 = targetFrames[2];
922 float* target3 = targetFrames[3];
923
924 constexpr unsigned int tBlockSize = 16u;
925
926 uint8x16x4_t source_u_8x16x4;
927
928 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
929 {
930 const unsigned int pixels = width * height;
931 const unsigned int blocks = pixels / tBlockSize;
932 const unsigned int remaining = pixels % tBlockSize;
933
934 for (unsigned int n = 0u; n < blocks; ++n)
935 {
936 source_u_8x16x4 = vld4q_u8(source);
937
938 const float32x4x4_t sourceA_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x4.val[0]);
939 const float32x4x4_t sourceB_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x4.val[1]);
940 const float32x4x4_t sourceC_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x4.val[2]);
941 const float32x4x4_t sourceD_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x4.val[3]);
942
943 vst1q_f32(target0 + 0, vmulq_f32(sourceA_f_32x4x4.val[0], targetFactor_32x4));
944 vst1q_f32(target0 + 4, vmulq_f32(sourceA_f_32x4x4.val[1], targetFactor_32x4));
945 vst1q_f32(target0 + 8, vmulq_f32(sourceA_f_32x4x4.val[2], targetFactor_32x4));
946 vst1q_f32(target0 + 12, vmulq_f32(sourceA_f_32x4x4.val[3], targetFactor_32x4));
947
948 vst1q_f32(target1 + 0, vmulq_f32(sourceB_f_32x4x4.val[0], targetFactor_32x4));
949 vst1q_f32(target1 + 4, vmulq_f32(sourceB_f_32x4x4.val[1], targetFactor_32x4));
950 vst1q_f32(target1 + 8, vmulq_f32(sourceB_f_32x4x4.val[2], targetFactor_32x4));
951 vst1q_f32(target1 + 12, vmulq_f32(sourceB_f_32x4x4.val[3], targetFactor_32x4));
952
953 vst1q_f32(target2 + 0, vmulq_f32(sourceC_f_32x4x4.val[0], targetFactor_32x4));
954 vst1q_f32(target2 + 4, vmulq_f32(sourceC_f_32x4x4.val[1], targetFactor_32x4));
955 vst1q_f32(target2 + 8, vmulq_f32(sourceC_f_32x4x4.val[2], targetFactor_32x4));
956 vst1q_f32(target2 + 12, vmulq_f32(sourceC_f_32x4x4.val[3], targetFactor_32x4));
957
958 vst1q_f32(target3 + 0, vmulq_f32(sourceD_f_32x4x4.val[0], targetFactor_32x4));
959 vst1q_f32(target3 + 4, vmulq_f32(sourceD_f_32x4x4.val[1], targetFactor_32x4));
960 vst1q_f32(target3 + 8, vmulq_f32(sourceD_f_32x4x4.val[2], targetFactor_32x4));
961 vst1q_f32(target3 + 12, vmulq_f32(sourceD_f_32x4x4.val[3], targetFactor_32x4));
962
963 source += tBlockSize * tChannels;
964
965 target0 += tBlockSize;
966 target1 += tBlockSize;
967 target2 += tBlockSize;
968 target3 += tBlockSize;
969 }
970
971 for (unsigned int n = 0u; n < remaining; ++n)
972 {
973 target0[n] = float(source[n * tChannels + 0u]) * targetFactor;
974 target1[n] = float(source[n * tChannels + 1u]) * targetFactor;
975 target2[n] = float(source[n * tChannels + 2u]) * targetFactor;
976 target3[n] = float(source[n * tChannels + 3u]) * targetFactor;
977 }
978 }
979 else
980 {
981 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[0];
982 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[1];
983 const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[2];
984 const unsigned int targetFrame3PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[3];
985
986 const unsigned int blocks = width / tBlockSize;
987 const unsigned int remaining = width % tBlockSize;
988
989 for (unsigned int y = 0u; y < height; ++y)
990 {
991 for (unsigned int n = 0u; n < blocks; ++n)
992 {
993 source_u_8x16x4 = vld4q_u8(source);
994
995 const float32x4x4_t sourceA_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x4.val[0]);
996 const float32x4x4_t sourceB_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x4.val[1]);
997 const float32x4x4_t sourceC_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x4.val[2]);
998 const float32x4x4_t sourceD_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x4.val[3]);
999
1000 vst1q_f32(target0 + 0, vmulq_f32(sourceA_f_32x4x4.val[0], targetFactor_32x4));
1001 vst1q_f32(target0 + 4, vmulq_f32(sourceA_f_32x4x4.val[1], targetFactor_32x4));
1002 vst1q_f32(target0 + 8, vmulq_f32(sourceA_f_32x4x4.val[2], targetFactor_32x4));
1003 vst1q_f32(target0 + 12, vmulq_f32(sourceA_f_32x4x4.val[3], targetFactor_32x4));
1004
1005 vst1q_f32(target1 + 0, vmulq_f32(sourceB_f_32x4x4.val[0], targetFactor_32x4));
1006 vst1q_f32(target1 + 4, vmulq_f32(sourceB_f_32x4x4.val[1], targetFactor_32x4));
1007 vst1q_f32(target1 + 8, vmulq_f32(sourceB_f_32x4x4.val[2], targetFactor_32x4));
1008 vst1q_f32(target1 + 12, vmulq_f32(sourceB_f_32x4x4.val[3], targetFactor_32x4));
1009
1010 vst1q_f32(target2 + 0, vmulq_f32(sourceC_f_32x4x4.val[0], targetFactor_32x4));
1011 vst1q_f32(target2 + 4, vmulq_f32(sourceC_f_32x4x4.val[1], targetFactor_32x4));
1012 vst1q_f32(target2 + 8, vmulq_f32(sourceC_f_32x4x4.val[2], targetFactor_32x4));
1013 vst1q_f32(target2 + 12, vmulq_f32(sourceC_f_32x4x4.val[3], targetFactor_32x4));
1014
1015 vst1q_f32(target3 + 0, vmulq_f32(sourceD_f_32x4x4.val[0], targetFactor_32x4));
1016 vst1q_f32(target3 + 4, vmulq_f32(sourceD_f_32x4x4.val[1], targetFactor_32x4));
1017 vst1q_f32(target3 + 8, vmulq_f32(sourceD_f_32x4x4.val[2], targetFactor_32x4));
1018 vst1q_f32(target3 + 12, vmulq_f32(sourceD_f_32x4x4.val[3], targetFactor_32x4));
1019
1020 source += tBlockSize * tChannels;
1021
1022 target0 += tBlockSize;
1023 target1 += tBlockSize;
1024 target2 += tBlockSize;
1025 target3 += tBlockSize;
1026 }
1027
1028 for (unsigned int n = 0u; n < remaining; ++n)
1029 {
1030 target0[n] = float(source[n * tChannels + 0u]) * targetFactor;
1031 target1[n] = float(source[n * tChannels + 1u]) * targetFactor;
1032 target2[n] = float(source[n * tChannels + 2u]) * targetFactor;
1033 target3[n] = float(source[n * tChannels + 3u]) * targetFactor;
1034 }
1035
1036 source += remaining * tChannels + sourceFramePaddingElements;
1037 target0 += remaining + targetFrame0PaddingElements;
1038 target1 += remaining + targetFrame1PaddingElements;
1039 target2 += remaining + targetFrame2PaddingElements;
1040 target3 += remaining + targetFrame3PaddingElements;
1041 }
1042 }
1043}
1044
1045template <>
1046void AdvancedFrameChannels::zipChannelsOnlySourceFactorNEON<float, uint8_t, 2u>(const float* const* const sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const float sourceFactor, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
1047{
1048 ocean_assert(sourceFrames != nullptr);
1049 ocean_assert(targetFrame != nullptr);
1050
1051 ocean_assert(width != 0u && height != 0u);
1052
1053 constexpr unsigned int tChannels = 2u;
1054
1055 bool allSourceFramesContinuous = true;
1056
1057 if (sourceFramesPaddingElements != nullptr)
1058 {
1059 for (unsigned int n = 0u; n < tChannels; ++n)
1060 {
1061 if (sourceFramesPaddingElements[n] != 0u)
1062 {
1063 allSourceFramesContinuous = false;
1064 break;
1065 }
1066 }
1067 }
1068
1069 const float32x4_t sourceFactor_32x4 = vdupq_n_f32(sourceFactor);
1070
1071 const float* source0 = sourceFrames[0];
1072 const float* source1 = sourceFrames[1];
1073 uint8_t* target = targetFrame;
1074
1075 constexpr unsigned int tBlockSize = 16u;
1076
1077 uint8x16x2_t target_8x16x2;
1078
1079 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
1080 {
1081 const unsigned int pixels = width * height;
1082 const unsigned int blocks = pixels / tBlockSize;
1083 const unsigned int remaining = pixels % tBlockSize;
1084
1085 for (unsigned int n = 0u; n < blocks; ++n)
1086 {
1087 const float32x4_t source0_A_f_32x4 = vmulq_f32(vld1q_f32(source0 + 0), sourceFactor_32x4);
1088 const float32x4_t source0_B_f_32x4 = vmulq_f32(vld1q_f32(source0 + 4), sourceFactor_32x4);
1089 const float32x4_t source0_C_f_32x4 = vmulq_f32(vld1q_f32(source0 + 8), sourceFactor_32x4);
1090 const float32x4_t source0_D_f_32x4 = vmulq_f32(vld1q_f32(source0 + 12), sourceFactor_32x4);
1091
1092 const float32x4_t source1_A_f_32x4 = vmulq_f32(vld1q_f32(source1 + 0), sourceFactor_32x4);
1093 const float32x4_t source1_B_f_32x4 = vmulq_f32(vld1q_f32(source1 + 4), sourceFactor_32x4);
1094 const float32x4_t source1_C_f_32x4 = vmulq_f32(vld1q_f32(source1 + 8), sourceFactor_32x4);
1095 const float32x4_t source1_D_f_32x4 = vmulq_f32(vld1q_f32(source1 + 12), sourceFactor_32x4);
1096
1097 target_8x16x2.val[0] = NEON::cast16ElementsNEON(source0_A_f_32x4, source0_B_f_32x4, source0_C_f_32x4, source0_D_f_32x4);
1098 target_8x16x2.val[1] = NEON::cast16ElementsNEON(source1_A_f_32x4, source1_B_f_32x4, source1_C_f_32x4, source1_D_f_32x4);
1099
1100 vst2q_u8(target, target_8x16x2);
1101
1102 source0 += tBlockSize;
1103 source1 += tBlockSize;
1104
1105 target += tBlockSize * tChannels;
1106 }
1107
1108 for (unsigned int n = 0u; n < remaining; ++n)
1109 {
1110 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
1111 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
1112
1113 target[n * tChannels + 0u] = uint8_t(source0[n] * sourceFactor);
1114 target[n * tChannels + 1u] = uint8_t(source1[n] * sourceFactor);
1115 }
1116 }
1117 else
1118 {
1119 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
1120 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
1121
1122 const unsigned int blocks = width / tBlockSize;
1123 const unsigned int remaining = width % tBlockSize;
1124
1125 for (unsigned int y = 0u; y < height; ++y)
1126 {
1127 for (unsigned int n = 0u; n < blocks; ++n)
1128 {
1129 const float32x4_t source0_A_f_32x4 = vmulq_f32(vld1q_f32(source0 + 0), sourceFactor_32x4);
1130 const float32x4_t source0_B_f_32x4 = vmulq_f32(vld1q_f32(source0 + 4), sourceFactor_32x4);
1131 const float32x4_t source0_C_f_32x4 = vmulq_f32(vld1q_f32(source0 + 8), sourceFactor_32x4);
1132 const float32x4_t source0_D_f_32x4 = vmulq_f32(vld1q_f32(source0 + 12), sourceFactor_32x4);
1133
1134 const float32x4_t source1_A_f_32x4 = vmulq_f32(vld1q_f32(source1 + 0), sourceFactor_32x4);
1135 const float32x4_t source1_B_f_32x4 = vmulq_f32(vld1q_f32(source1 + 4), sourceFactor_32x4);
1136 const float32x4_t source1_C_f_32x4 = vmulq_f32(vld1q_f32(source1 + 8), sourceFactor_32x4);
1137 const float32x4_t source1_D_f_32x4 = vmulq_f32(vld1q_f32(source1 + 12), sourceFactor_32x4);
1138
1139 target_8x16x2.val[0] = NEON::cast16ElementsNEON(source0_A_f_32x4, source0_B_f_32x4, source0_C_f_32x4, source0_D_f_32x4);
1140 target_8x16x2.val[1] = NEON::cast16ElementsNEON(source1_A_f_32x4, source1_B_f_32x4, source1_C_f_32x4, source1_D_f_32x4);
1141
1142 vst2q_u8(target, target_8x16x2);
1143
1144 source0 += tBlockSize;
1145 source1 += tBlockSize;
1146
1147 target += tBlockSize * tChannels;
1148 }
1149
1150 for (unsigned int n = 0u; n < remaining; ++n)
1151 {
1152 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
1153 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
1154
1155 target[n * tChannels + 0u] = uint8_t(source0[n] * sourceFactor);
1156 target[n * tChannels + 1u] = uint8_t(source1[n] * sourceFactor);
1157 }
1158
1159 source0 += remaining + sourceFrame0PaddingElements;
1160 source1 += remaining + sourceFrame1PaddingElements;
1161 target += remaining * tChannels + targetFramePaddingElements;
1162 }
1163 }
1164}
1165
1166template <>
1167void AdvancedFrameChannels::zipChannelsOnlySourceFactorNEON<float, uint8_t, 3u>(const float* const* const sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const float sourceFactor, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
1168{
1169 ocean_assert(sourceFrames != nullptr);
1170 ocean_assert(targetFrame != nullptr);
1171
1172 ocean_assert(width != 0u && height != 0u);
1173
1174 constexpr unsigned int tChannels = 3u;
1175
1176 bool allSourceFramesContinuous = true;
1177
1178 if (sourceFramesPaddingElements != nullptr)
1179 {
1180 for (unsigned int n = 0u; n < tChannels; ++n)
1181 {
1182 if (sourceFramesPaddingElements[n] != 0u)
1183 {
1184 allSourceFramesContinuous = false;
1185 break;
1186 }
1187 }
1188 }
1189
1190 const float32x4_t sourceFactor_32x4 = vdupq_n_f32(sourceFactor);
1191
1192 const float* source0 = sourceFrames[0];
1193 const float* source1 = sourceFrames[1];
1194 const float* source2 = sourceFrames[2];
1195 uint8_t* target = targetFrame;
1196
1197 constexpr unsigned int tBlockSize = 16u;
1198
1199 uint8x16x3_t target_8x16x3;
1200
1201 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
1202 {
1203 const unsigned int pixels = width * height;
1204 const unsigned int blocks = pixels / tBlockSize;
1205 const unsigned int remaining = pixels % tBlockSize;
1206
1207 for (unsigned int n = 0u; n < blocks; ++n)
1208 {
1209 const float32x4_t source0_A_f_32x4 = vmulq_f32(vld1q_f32(source0 + 0), sourceFactor_32x4);
1210 const float32x4_t source0_B_f_32x4 = vmulq_f32(vld1q_f32(source0 + 4), sourceFactor_32x4);
1211 const float32x4_t source0_C_f_32x4 = vmulq_f32(vld1q_f32(source0 + 8), sourceFactor_32x4);
1212 const float32x4_t source0_D_f_32x4 = vmulq_f32(vld1q_f32(source0 + 12), sourceFactor_32x4);
1213
1214 const float32x4_t source1_A_f_32x4 = vmulq_f32(vld1q_f32(source1 + 0), sourceFactor_32x4);
1215 const float32x4_t source1_B_f_32x4 = vmulq_f32(vld1q_f32(source1 + 4), sourceFactor_32x4);
1216 const float32x4_t source1_C_f_32x4 = vmulq_f32(vld1q_f32(source1 + 8), sourceFactor_32x4);
1217 const float32x4_t source1_D_f_32x4 = vmulq_f32(vld1q_f32(source1 + 12), sourceFactor_32x4);
1218
1219 const float32x4_t source2_A_f_32x4 = vmulq_f32(vld1q_f32(source2 + 0), sourceFactor_32x4);
1220 const float32x4_t source2_B_f_32x4 = vmulq_f32(vld1q_f32(source2 + 4), sourceFactor_32x4);
1221 const float32x4_t source2_C_f_32x4 = vmulq_f32(vld1q_f32(source2 + 8), sourceFactor_32x4);
1222 const float32x4_t source2_D_f_32x4 = vmulq_f32(vld1q_f32(source2 + 12), sourceFactor_32x4);
1223
1224 target_8x16x3.val[0] = NEON::cast16ElementsNEON(source0_A_f_32x4, source0_B_f_32x4, source0_C_f_32x4, source0_D_f_32x4);
1225 target_8x16x3.val[1] = NEON::cast16ElementsNEON(source1_A_f_32x4, source1_B_f_32x4, source1_C_f_32x4, source1_D_f_32x4);
1226 target_8x16x3.val[2] = NEON::cast16ElementsNEON(source2_A_f_32x4, source2_B_f_32x4, source2_C_f_32x4, source2_D_f_32x4);
1227
1228 vst3q_u8(target, target_8x16x3);
1229
1230 source0 += tBlockSize;
1231 source1 += tBlockSize;
1232 source2 += tBlockSize;
1233
1234 target += tBlockSize * tChannels;
1235 }
1236
1237 for (unsigned int n = 0u; n < remaining; ++n)
1238 {
1239 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
1240 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
1241 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
1242
1243 target[n * tChannels + 0u] = uint8_t(source0[n] * sourceFactor);
1244 target[n * tChannels + 1u] = uint8_t(source1[n] * sourceFactor);
1245 target[n * tChannels + 2u] = uint8_t(source2[n] * sourceFactor);
1246 }
1247 }
1248 else
1249 {
1250 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
1251 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
1252 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
1253
1254 const unsigned int blocks = width / tBlockSize;
1255 const unsigned int remaining = width % tBlockSize;
1256
1257 for (unsigned int y = 0u; y < height; ++y)
1258 {
1259 for (unsigned int n = 0u; n < blocks; ++n)
1260 {
1261 const float32x4_t source0_A_f_32x4 = vmulq_f32(vld1q_f32(source0 + 0), sourceFactor_32x4);
1262 const float32x4_t source0_B_f_32x4 = vmulq_f32(vld1q_f32(source0 + 4), sourceFactor_32x4);
1263 const float32x4_t source0_C_f_32x4 = vmulq_f32(vld1q_f32(source0 + 8), sourceFactor_32x4);
1264 const float32x4_t source0_D_f_32x4 = vmulq_f32(vld1q_f32(source0 + 12), sourceFactor_32x4);
1265
1266 const float32x4_t source1_A_f_32x4 = vmulq_f32(vld1q_f32(source1 + 0), sourceFactor_32x4);
1267 const float32x4_t source1_B_f_32x4 = vmulq_f32(vld1q_f32(source1 + 4), sourceFactor_32x4);
1268 const float32x4_t source1_C_f_32x4 = vmulq_f32(vld1q_f32(source1 + 8), sourceFactor_32x4);
1269 const float32x4_t source1_D_f_32x4 = vmulq_f32(vld1q_f32(source1 + 12), sourceFactor_32x4);
1270
1271 const float32x4_t source2_A_f_32x4 = vmulq_f32(vld1q_f32(source2 + 0), sourceFactor_32x4);
1272 const float32x4_t source2_B_f_32x4 = vmulq_f32(vld1q_f32(source2 + 4), sourceFactor_32x4);
1273 const float32x4_t source2_C_f_32x4 = vmulq_f32(vld1q_f32(source2 + 8), sourceFactor_32x4);
1274 const float32x4_t source2_D_f_32x4 = vmulq_f32(vld1q_f32(source2 + 12), sourceFactor_32x4);
1275
1276 target_8x16x3.val[0] = NEON::cast16ElementsNEON(source0_A_f_32x4, source0_B_f_32x4, source0_C_f_32x4, source0_D_f_32x4);
1277 target_8x16x3.val[1] = NEON::cast16ElementsNEON(source1_A_f_32x4, source1_B_f_32x4, source1_C_f_32x4, source1_D_f_32x4);
1278 target_8x16x3.val[2] = NEON::cast16ElementsNEON(source2_A_f_32x4, source2_B_f_32x4, source2_C_f_32x4, source2_D_f_32x4);
1279
1280 vst3q_u8(target, target_8x16x3);
1281
1282 source0 += tBlockSize;
1283 source1 += tBlockSize;
1284 source2 += tBlockSize;
1285
1286 target += tBlockSize * tChannels;
1287 }
1288
1289 for (unsigned int n = 0u; n < remaining; ++n)
1290 {
1291 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
1292 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
1293 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
1294
1295 target[n * tChannels + 0u] = uint8_t(source0[n] * sourceFactor);
1296 target[n * tChannels + 1u] = uint8_t(source1[n] * sourceFactor);
1297 target[n * tChannels + 2u] = uint8_t(source2[n] * sourceFactor);
1298 }
1299
1300 source0 += remaining + sourceFrame0PaddingElements;
1301 source1 += remaining + sourceFrame1PaddingElements;
1302 source2 += remaining + sourceFrame2PaddingElements;
1303 target += remaining * tChannels + targetFramePaddingElements;
1304 }
1305 }
1306}
1307
1308template <>
1309void AdvancedFrameChannels::zipChannelsOnlySourceFactorNEON<float, uint8_t, 4u>(const float* const* const sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const float sourceFactor, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
1310{
1311 ocean_assert(sourceFrames != nullptr);
1312 ocean_assert(targetFrame != nullptr);
1313
1314 ocean_assert(width != 0u && height != 0u);
1315
1316 constexpr unsigned int tChannels = 4u;
1317
1318 bool allSourceFramesContinuous = true;
1319
1320 if (sourceFramesPaddingElements != nullptr)
1321 {
1322 for (unsigned int n = 0u; n < tChannels; ++n)
1323 {
1324 if (sourceFramesPaddingElements[n] != 0u)
1325 {
1326 allSourceFramesContinuous = false;
1327 break;
1328 }
1329 }
1330 }
1331
1332 const float32x4_t sourceFactor_32x4 = vdupq_n_f32(sourceFactor);
1333
1334 const float* source0 = sourceFrames[0];
1335 const float* source1 = sourceFrames[1];
1336 const float* source2 = sourceFrames[2];
1337 const float* source3 = sourceFrames[3];
1338 uint8_t* target = targetFrame;
1339
1340 constexpr unsigned int tBlockSize = 16u;
1341
1342 uint8x16x4_t target_8x16x4;
1343
1344 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
1345 {
1346 const unsigned int pixels = width * height;
1347 const unsigned int blocks = pixels / tBlockSize;
1348 const unsigned int remaining = pixels % tBlockSize;
1349
1350 for (unsigned int n = 0u; n < blocks; ++n)
1351 {
1352 const float32x4_t source0_A_f_32x4 = vmulq_f32(vld1q_f32(source0 + 0), sourceFactor_32x4);
1353 const float32x4_t source0_B_f_32x4 = vmulq_f32(vld1q_f32(source0 + 4), sourceFactor_32x4);
1354 const float32x4_t source0_C_f_32x4 = vmulq_f32(vld1q_f32(source0 + 8), sourceFactor_32x4);
1355 const float32x4_t source0_D_f_32x4 = vmulq_f32(vld1q_f32(source0 + 12), sourceFactor_32x4);
1356
1357 const float32x4_t source1_A_f_32x4 = vmulq_f32(vld1q_f32(source1 + 0), sourceFactor_32x4);
1358 const float32x4_t source1_B_f_32x4 = vmulq_f32(vld1q_f32(source1 + 4), sourceFactor_32x4);
1359 const float32x4_t source1_C_f_32x4 = vmulq_f32(vld1q_f32(source1 + 8), sourceFactor_32x4);
1360 const float32x4_t source1_D_f_32x4 = vmulq_f32(vld1q_f32(source1 + 12), sourceFactor_32x4);
1361
1362 const float32x4_t source2_A_f_32x4 = vmulq_f32(vld1q_f32(source2 + 0), sourceFactor_32x4);
1363 const float32x4_t source2_B_f_32x4 = vmulq_f32(vld1q_f32(source2 + 4), sourceFactor_32x4);
1364 const float32x4_t source2_C_f_32x4 = vmulq_f32(vld1q_f32(source2 + 8), sourceFactor_32x4);
1365 const float32x4_t source2_D_f_32x4 = vmulq_f32(vld1q_f32(source2 + 12), sourceFactor_32x4);
1366
1367 const float32x4_t source3_A_f_32x4 = vmulq_f32(vld1q_f32(source3 + 0), sourceFactor_32x4);
1368 const float32x4_t source3_B_f_32x4 = vmulq_f32(vld1q_f32(source3 + 4), sourceFactor_32x4);
1369 const float32x4_t source3_C_f_32x4 = vmulq_f32(vld1q_f32(source3 + 8), sourceFactor_32x4);
1370 const float32x4_t source3_D_f_32x4 = vmulq_f32(vld1q_f32(source3 + 12), sourceFactor_32x4);
1371
1372 target_8x16x4.val[0] = NEON::cast16ElementsNEON(source0_A_f_32x4, source0_B_f_32x4, source0_C_f_32x4, source0_D_f_32x4);
1373 target_8x16x4.val[1] = NEON::cast16ElementsNEON(source1_A_f_32x4, source1_B_f_32x4, source1_C_f_32x4, source1_D_f_32x4);
1374 target_8x16x4.val[2] = NEON::cast16ElementsNEON(source2_A_f_32x4, source2_B_f_32x4, source2_C_f_32x4, source2_D_f_32x4);
1375 target_8x16x4.val[3] = NEON::cast16ElementsNEON(source3_A_f_32x4, source3_B_f_32x4, source3_C_f_32x4, source3_D_f_32x4);
1376
1377 vst4q_u8(target, target_8x16x4);
1378
1379 source0 += tBlockSize;
1380 source1 += tBlockSize;
1381 source2 += tBlockSize;
1382 source3 += tBlockSize;
1383
1384 target += tBlockSize * tChannels;
1385 }
1386
1387 for (unsigned int n = 0u; n < remaining; ++n)
1388 {
1389 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
1390 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
1391 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
1392 ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
1393
1394 target[n * tChannels + 0u] = uint8_t(source0[n] * sourceFactor);
1395 target[n * tChannels + 1u] = uint8_t(source1[n] * sourceFactor);
1396 target[n * tChannels + 2u] = uint8_t(source2[n] * sourceFactor);
1397 target[n * tChannels + 3u] = uint8_t(source3[n] * sourceFactor);
1398 }
1399 }
1400 else
1401 {
1402 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
1403 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
1404 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
1405 const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[3];
1406
1407 const unsigned int blocks = width / tBlockSize;
1408 const unsigned int remaining = width % tBlockSize;
1409
1410 for (unsigned int y = 0u; y < height; ++y)
1411 {
1412 for (unsigned int n = 0u; n < blocks; ++n)
1413 {
1414 const float32x4_t source0_A_f_32x4 = vmulq_f32(vld1q_f32(source0 + 0), sourceFactor_32x4);
1415 const float32x4_t source0_B_f_32x4 = vmulq_f32(vld1q_f32(source0 + 4), sourceFactor_32x4);
1416 const float32x4_t source0_C_f_32x4 = vmulq_f32(vld1q_f32(source0 + 8), sourceFactor_32x4);
1417 const float32x4_t source0_D_f_32x4 = vmulq_f32(vld1q_f32(source0 + 12), sourceFactor_32x4);
1418
1419 const float32x4_t source1_A_f_32x4 = vmulq_f32(vld1q_f32(source1 + 0), sourceFactor_32x4);
1420 const float32x4_t source1_B_f_32x4 = vmulq_f32(vld1q_f32(source1 + 4), sourceFactor_32x4);
1421 const float32x4_t source1_C_f_32x4 = vmulq_f32(vld1q_f32(source1 + 8), sourceFactor_32x4);
1422 const float32x4_t source1_D_f_32x4 = vmulq_f32(vld1q_f32(source1 + 12), sourceFactor_32x4);
1423
1424 const float32x4_t source2_A_f_32x4 = vmulq_f32(vld1q_f32(source2 + 0), sourceFactor_32x4);
1425 const float32x4_t source2_B_f_32x4 = vmulq_f32(vld1q_f32(source2 + 4), sourceFactor_32x4);
1426 const float32x4_t source2_C_f_32x4 = vmulq_f32(vld1q_f32(source2 + 8), sourceFactor_32x4);
1427 const float32x4_t source2_D_f_32x4 = vmulq_f32(vld1q_f32(source2 + 12), sourceFactor_32x4);
1428
1429 const float32x4_t source3_A_f_32x4 = vmulq_f32(vld1q_f32(source3 + 0), sourceFactor_32x4);
1430 const float32x4_t source3_B_f_32x4 = vmulq_f32(vld1q_f32(source3 + 4), sourceFactor_32x4);
1431 const float32x4_t source3_C_f_32x4 = vmulq_f32(vld1q_f32(source3 + 8), sourceFactor_32x4);
1432 const float32x4_t source3_D_f_32x4 = vmulq_f32(vld1q_f32(source3 + 12), sourceFactor_32x4);
1433
1434 target_8x16x4.val[0] = NEON::cast16ElementsNEON(source0_A_f_32x4, source0_B_f_32x4, source0_C_f_32x4, source0_D_f_32x4);
1435 target_8x16x4.val[1] = NEON::cast16ElementsNEON(source1_A_f_32x4, source1_B_f_32x4, source1_C_f_32x4, source1_D_f_32x4);
1436 target_8x16x4.val[2] = NEON::cast16ElementsNEON(source2_A_f_32x4, source2_B_f_32x4, source2_C_f_32x4, source2_D_f_32x4);
1437 target_8x16x4.val[3] = NEON::cast16ElementsNEON(source3_A_f_32x4, source3_B_f_32x4, source3_C_f_32x4, source3_D_f_32x4);
1438
1439 vst4q_u8(target, target_8x16x4);
1440
1441 source0 += tBlockSize;
1442 source1 += tBlockSize;
1443 source2 += tBlockSize;
1444 source3 += tBlockSize;
1445
1446 target += tBlockSize * tChannels;
1447 }
1448
1449 for (unsigned int n = 0u; n < remaining; ++n)
1450 {
1451 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
1452 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
1453 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
1454 ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
1455
1456 target[n * tChannels + 0u] = uint8_t(source0[n] * sourceFactor);
1457 target[n * tChannels + 1u] = uint8_t(source1[n] * sourceFactor);
1458 target[n * tChannels + 2u] = uint8_t(source2[n] * sourceFactor);
1459 target[n * tChannels + 3u] = uint8_t(source3[n] * sourceFactor);
1460 }
1461
1462 source0 += remaining + sourceFrame0PaddingElements;
1463 source1 += remaining + sourceFrame1PaddingElements;
1464 source2 += remaining + sourceFrame2PaddingElements;
1465 source3 += remaining + sourceFrame3PaddingElements;
1466 target += remaining * tChannels + targetFramePaddingElements;
1467 }
1468 }
1469}
1470
1471#endif // OCEAN_HARDWARE_NEON_VERSION
1472
1473}
1474
1475}
1476
1477}
1478
1479#endif // META_OCEAN_CV_ADVANCED_ADVANCED_FRAME_CHANNLES_H
This class implements advanced frame channel conversion, transformation and extraction functions.
Definition AdvancedFrameChannels.h:30
static void zipChannels(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition AdvancedFrameChannels.h:378
static void zipChannelsRuntime(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition AdvancedFrameChannels.h:571
static void zipChannelsOnlySourceFactorNEON(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const TSource sourceFactor, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
static void separateTo1ChannelOnlyTargetFactorNEON(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static void separateTo1ChannelRuntime(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition AdvancedFrameChannels.h:495
static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME
Definition of a constant to specify that the number of channels are not known at compile time but at ...
Definition AdvancedFrameChannels.h:36
static void separateTo1Channel(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition AdvancedFrameChannels.h:254
static OCEAN_FORCE_INLINE uint8x16_t cast16ElementsNEON(const float32x4_t &sourceA_f_32x4, const float32x4_t &sourceB_f_32x4, const float32x4_t &sourceC_f_32x4, const float32x4_t &sourceD_f_32x4)
Casts 16 float elements to 16 uint8_t elements.
Definition NEON.h:1208
std::vector< Index32 > Indices32
Definition of a vector holding 32 bit index values.
Definition Base.h:96
The namespace covering the entire Ocean framework.
Definition Accessor.h:15