Ocean
AdvancedFrameChannels.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #ifndef META_OCEAN_CV_ADVANCED_ADVANCED_FRAME_CHANNLES_H
9 #define META_OCEAN_CV_ADVANCED_ADVANCED_FRAME_CHANNLES_H
10 
12 #include "ocean/cv/NEON.h"
13 
14 #include "ocean/base/Frame.h"
15 
16 namespace Ocean
17 {
18 
19 namespace CV
20 {
21 
22 namespace Advanced
23 {
24 
25 /**
26  * This class implements advanced frame channel conversion, transformation and extraction functions.
27  * @ingroup cvadvanced
28  */
29 class OCEAN_CV_ADVANCED_EXPORT AdvancedFrameChannels
30 {
31  public:
32 
33  /**
34  * Definition of a constant to specify that the number of channels are not known at compile time but at runtime only.
35  */
36  static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME = 0u;
37 
38  /**
39  * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
40  * In addition to CV::FrameChannels::separateTo1Channel(), this function supports multiplication factors for source and target elements.<br>
41  * Usage:
42  * @code
43  * const unsigned int width = ...;
44  * const unsigned int height = ...;
45  *
46  * uint8_t* sourceFrame = ...;
47  * const unsigned int sourceFramePaddingElements = ...;
48  *
49  * constexpr unsigned int channels = 2u;
50  *
51  * const float* targetFrames[channels] = {..., ...};
52  * const unsigned int targetFramesPaddingElements[2] = {..., ...};
53  *
54  * constexpr uint8_t sourceFactor = 1u;
55  * constexpr uint8_t targetFactor = 1.0f / 255.0f;
56  *
57  * separateTo1Channel<uint8_t, float, channels>(sourceFrame, targetFrames, width, height, channels, sourceFactor, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements);
58  * @endcode
59  * @param sourceFrame The frame to be separated, must be valid
60  * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
61  * @param width The width of the source frame in pixel, with range [1, infinity)
62  * @param height The height of the source frame in pixel, with range [1, infinity)
63  * @param channels The number of channels the source frame has, with range [1, infinity)
64  * @param sourceFactor The multiplication factor each source element will be multiplied with, with range (-infinity, infinity)/{0}
65  * @param targetFactor The multiplication factor each target element will be multiplied with, with range (-infinity, infinity)/{0}
66  * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
67  * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity), nullptr if all are zero
68  * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
69  * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
70  * @tparam tChannels The number of source frames (and target channels) if known at compile time; otherwise CHANNELS_NOT_KNOWN_AT_COMPILE_TIME == 0, if know at compile time must be identical with 'channels'
71  */
72  template <typename TSource, typename TTarget, unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
73  static void separateTo1Channel(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements);
74 
75  /**
76  * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
77  * In addition to CV::FrameChannels::separateTo1Channel(), this function supports multiplication factors for source and target elements.<br>
78  * Usage:
79  * @code
80  * const unsigned int width = ...;
81  * const unsigned int height = ...;
82  *
83  * const uint8_t* sourceFrame = ...;
84  * const unsigned int sourceFramePaddingElements = ...;
85  *
86  * float* targetFrame0 = ...;
87  * float* targetFrame1 = ...;
88  * const unsigned int targetFramePaddingElements0 = ...;
89  * const unsigned int targetFramePaddingElements1 = ...;
90  *
91  * constexpr uint8_t sourceFactor = 1u;
92  * constexpr uint8_t targetFactor = 1.0f / 255.0f;
93  *
94  * separateTo1Channel<uint8_t, float>(sourceFrame, {targetFrame0, targetFrame1}, width, height, sourceFactor, targetFactor, sourceFramePaddingElements, {targetFramePaddingElements0, targetFramePaddingElements1});
95  * @endcode
96  * @param sourceFrame The frame to be separated, must be valid
97  * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
98  * @param width The width of the source frame in pixel, with range [1, infinity)
99  * @param height The height of the source frame in pixel, with range [1, infinity)
100  * @param sourceFactor The multiplication factor each source element will be multiplied with, with range (-infinity, infinity)/{0}
101  * @param targetFactor The multiplication factor each target element will be multiplied with, with range (-infinity, infinity)/{0}
102  * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
103  * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity)
104  * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
105  * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
106  */
107  template <typename TSource, typename TTarget>
108  static void separateTo1Channel(const TSource* const sourceFrame, const std::initializer_list<TTarget*>& targetFrames, const unsigned int width, const unsigned int height, const TSource sourceFactor, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const std::initializer_list<const unsigned int>& targetFramesPaddingElements);
109 
110  /**
111  * Zips/interleaves 1-channel images into one image with n-channels.
112  * In addition to CV::FrameChannels::targetFramePaddingElements(), this function supports multiplication factors for source and target elements.<br>
113  * Usage:
114  * @code
115  * const unsigned int width = ...;
116  * const unsigned int height = ...;
117  *
118  * const float* sourceFrames[2] = {..., ...};
119  * const unsigned int sourceFramesPaddingElements[2] = {..., ...};
120  *
121  * uint8_t* targetFrame = ...;
122  * const unsigned int targetFramePaddingElements = ...;
123  *
124  * constexpr float sourceFactor = 255.0f;
125  * constexpr uint8_t targetFactor = 1u;
126  *
127  * zipChannels<float, uint8_t>(sourceFrames, targetFrame, width, height, 2u, sourceFactor, targetFactor, sourceFramesPaddingElements, targetFramePaddingElements);
128  * @endcode
129  * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
130  * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
131  * @param width The width of the source frames in pixel, with range [1, infinity)
132  * @param height The height of the source frames in pixel, with range [1, infinity)
133  * @param channels The number of provided source frames (and the number of channels the target frame will have), with range [1, infinity)
134  * @param sourceFactor The multiplication factor each source element will be multiplied with, with range (-infinity, infinity)/{0}
135  * @param targetFactor The multiplication factor each target element will be multiplied with, with range (-infinity, infinity)/{0}
136  * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity), nullptr if all are zero
137  * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
138  * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
139  * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
140  * @tparam tChannels The number of source frames (and target channels) if known at compile time; otherwise CHANNELS_NOT_KNOWN_AT_COMPILE_TIME == 0, if know at compile time must be identical with 'channels'
141  */
142  template <typename TSource, typename TTarget, unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
143  static void zipChannels(const TSource* const* const sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
144 
145  /**
146  * Zips/interleaves 1-channel images into one image with n-channels.
147  * In addition to CV::FrameChannels::targetFramePaddingElements(), this function supports multiplication factors for source and target elements.<br>
148  * Usage:
149  * @code
150  * const unsigned int width = ...;
151  * const unsigned int height = ...;
152  *
153  * const float* sourceFrame0 = ...;
154  * const float* sourceFrame1 = ...;
155  * const unsigned int sourceFramePaddingElements0 = ...;
156  * const unsigned int sourceFramePaddingElements1 = ...;
157  *
158  * uint8_t* targetFrame = ...;
159  * const unsigned int targetFramePaddingElements = ...;
160  *
161  * constexpr float sourceFactor = 255.0f;
162  * constexpr uint8_t targetFactor = 1u;
163  *
164  * zipChannels<float, uint8_t>({sourceFrame0, sourceFrame1}, targetFrame, width, height, sourceFactor, targetFactor, {sourceFramePaddingElements0, sourceFramePaddingElements1}, targetFramePaddingElements);
165  * @endcode
166  * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
167  * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
168  * @param width The width of the source frames in pixel, with range [1, infinity)
169  * @param height The height of the source frames in pixel, with range [1, infinity)
170  * @param sourceFactor The multiplication factor each source element will be multiplied with, with range (-infinity, infinity)/{0}
171  * @param targetFactor The multiplication factor each target element will be multiplied with, with range (-infinity, infinity)/{0}
172  * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity)
173  * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
174  * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
175  * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
176  */
177  template <typename TSource, typename TTarget>
178  static void zipChannels(const std::initializer_list<const TSource*>& sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const TSource sourceFactor, const TTarget targetFactor, const std::initializer_list<unsigned int>& sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
179 
180  protected:
181 
182  /**
183  * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
184  * @param sourceFrame The frame to be separated, must be valid
185  * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
186  * @param width The width of the source frame in pixel, with range [1, infinity)
187  * @param height The height of the source frame in pixel, with range [1, infinity)
188  * @param channels The number of channels the source frame has, with range [1, infinity)
189  * @param sourceFactor The multiplication factor each source element will be multiplied with, with range (-infinity, infinity)/{0}
190  * @param targetFactor The multiplication factor each target element will be multiplied with, with range (-infinity, infinity)/{0}
191  * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
192  * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity)
193  * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
194  * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
195  */
196  template <typename TSource, typename TTarget>
197  static void separateTo1ChannelRuntime(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements);
198 
199  /**
200  * Zips/interleaves 1-channel images into one image with n-channels.
201  * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
202  * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
203  * @param width The width of the source frames in pixel, with range [1, infinity)
204  * @param height The height of the source frames in pixel, with range [1, infinity)
205  * @param channels The number of provided source frames (and the number of channels the target frame will have), with range [1, infinity)
206  * @param sourceFactor The multiplication factor each source element will be multiplied with, with range (-infinity, infinity)/{0}
207  * @param targetFactor The multiplication factor each target element will be multiplied with, with range (-infinity, infinity)/{0}
208  * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity)
209  * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
210  * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
211  * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
212  */
213  template <typename TSource, typename TTarget>
214  static void zipChannelsRuntime(const TSource* const* const sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
215 
216 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
217 
218  /**
219  * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
220  * @param sourceFrame The frame to be separated, must be valid
221  * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
222  * @param width The width of the source frame in pixel, with range [1, infinity)
223  * @param height The height of the source frame in pixel, with range [1, infinity)
224  * @param targetFactor The multiplication factor each target element will be multiplied with, with range (-infinity, infinity)/{0}
225  * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
226  * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity), nullptr if all are zero
227  * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
228  * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
229  * @tparam tChannels The number of source frames (and target channels) if known at compile time; otherwise CHANNELS_NOT_KNOWN_AT_COMPILE_TIME == 0, if know at compile time must be identical with 'channels'
230  */
231  template <typename TSource, typename TTarget, unsigned int tChannels>
232  static void separateTo1ChannelOnlyTargetFactorNEON(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements);
233 
234  /**
235  * Zips/interleaves 1-channel images into one image with n-channels.
236  * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
237  * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
238  * @param width The width of the source frames in pixel, with range [1, infinity)
239  * @param height The height of the source frames in pixel, with range [1, infinity)
240  * @param sourceFactor The multiplication factor each source element will be multiplied with, with range (-infinity, infinity)/{0}
241  * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity)
242  * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
243  * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
244  * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
245  * @tparam tChannels The number of source frames (and target channels) if known at compile time; otherwise CHANNELS_NOT_KNOWN_AT_COMPILE_TIME == 0, if know at compile time must be identical with 'channels'
246  */
247  template <typename TSource, typename TTarget, unsigned int tChannels>
248  static void zipChannelsOnlySourceFactorNEON(const TSource* const* const sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const TSource sourceFactor, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
249 
250 #endif // OCEAN_HARDWARE_NEON_VERSION
251 };
252 
253 template <typename TSource, typename TTarget, unsigned int tChannels>
254 void AdvancedFrameChannels::separateTo1Channel(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
255 {
256  ocean_assert(sourceFrame != nullptr);
257  ocean_assert(targetFrames != nullptr);
258 
259  ocean_assert(width != 0u && height != 0u);
260 
261  assert(sourceFactor != TSource(0) && targetFactor != TTarget(0));
262  assert(sourceFactor != TSource(1) || targetFactor != TTarget(1)); // Identity factors, use FrameChannels::separateTo1Channel() instead!
263 
264  ocean_assert(tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME || tChannels == channels);
265 
266  if constexpr (tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME)
267  {
268  separateTo1ChannelRuntime<TSource, TTarget>(sourceFrame, targetFrames, width, height, channels, sourceFactor, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements);
269  return;
270  }
271 
272 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
273 
274  if constexpr (std::is_same<TSource, uint8_t>::value && std::is_same<TTarget, float>::value && tChannels >= 2u && tChannels <= 4u)
275  {
276  if (sourceFactor == uint8_t(1))
277  {
278  separateTo1ChannelOnlyTargetFactorNEON<TSource, TTarget, tChannels>(sourceFrame, targetFrames, width, height, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements);
279 
280  return;
281  }
282  }
283 
284 #endif // OCEAN_HARDWARE_NEON_VERSION
285 
286 #ifdef OCEAN_DEBUG
287  for (unsigned int c = 0u; c < tChannels; ++c)
288  {
289  ocean_assert(targetFrames[c] != nullptr);
290  }
291 #endif
292 
293  if (sourceFramePaddingElements == 0u && targetFramesPaddingElements == nullptr)
294  {
295  for (unsigned int n = 0u; n < width * height; ++n)
296  {
297  for (unsigned int c = 0u; c < tChannels; ++c)
298  {
299  targetFrames[c][n] = TTarget(sourceFrame[n * tChannels + c] * sourceFactor) * targetFactor;
300  }
301  }
302  }
303  else if (targetFramesPaddingElements == nullptr)
304  {
305  ocean_assert(sourceFramePaddingElements != 0u);
306 
307  const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
308 
309  for (unsigned int y = 0u; y < height; ++y)
310  {
311  const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
312 
313  const unsigned int targetRowOffset = y * width;
314 
315  for (unsigned int x = 0u; x < width; ++x)
316  {
317  for (unsigned int c = 0u; c < tChannels; ++c)
318  {
319  *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * tChannels + c) * sourceFactor) * targetFactor;
320  }
321  }
322  }
323  }
324  else
325  {
326  const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
327 
328  Indices32 targetFrameStrideElements(tChannels);
329 
330  for (unsigned int c = 0u; c < tChannels; ++c)
331  {
332  targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
333  }
334 
335  for (unsigned int y = 0u; y < height; ++y)
336  {
337  const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
338 
339  for (unsigned int x = 0u; x < width; ++x)
340  {
341  for (unsigned int c = 0u; c < tChannels; ++c)
342  {
343  *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * tChannels + c) * sourceFactor) * targetFactor;
344  }
345  }
346  }
347  }
348 }
349 
350 template <typename TSource, typename TTarget>
351 void AdvancedFrameChannels::separateTo1Channel(const TSource* const sourceFrame, const std::initializer_list<TTarget*>& targetFrames, const unsigned int width, const unsigned int height, const TSource sourceFactor, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const std::initializer_list<const unsigned int>& targetFramesPaddingElements)
352 {
353  ocean_assert(targetFrames.size() >= 1);
354  ocean_assert(targetFramesPaddingElements.size() == 0 || targetFrames.size() == targetFramesPaddingElements.size());
355 
356  assert(sourceFactor != TSource(0) && targetFactor != TTarget(0));
357  assert(sourceFactor != TSource(1) || targetFactor != TTarget(1)); // Identity factors, use FrameChannels::separateTo1Channel() instead!
358 
359  if (targetFrames.size() == 2)
360  {
361  separateTo1Channel<TSource, TTarget, 2u>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
362  }
363  else if (targetFrames.size() == 3)
364  {
365  separateTo1Channel<TSource, TTarget, 3u>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
366  }
367  else if (targetFrames.size() == 4)
368  {
369  separateTo1Channel<TSource, TTarget, 4u>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
370  }
371  else
372  {
373  separateTo1Channel<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
374  }
375 }
376 
377 template <typename TSource, typename TTarget, unsigned int tChannels>
378 void AdvancedFrameChannels::zipChannels(const TSource* const* sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
379 {
380  ocean_assert(sourceFrames != nullptr);
381  ocean_assert(targetFrame != nullptr);
382 
383  ocean_assert(width != 0u && height != 0u);
384 
385  assert(sourceFactor != TSource(0) && targetFactor != TTarget(0));
386  assert(sourceFactor != TSource(1) || targetFactor != TTarget(1)); // Identity factors, use FrameChannels::separateTo1Channel() instead!
387 
388  ocean_assert(tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME || tChannels == channels);
389 
390  if constexpr (tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME)
391  {
392  zipChannelsRuntime<TSource, TTarget>(sourceFrames, targetFrame, width, height, channels, sourceFactor, targetFactor, sourceFramesPaddingElements, targetFramePaddingElements);
393  return;
394  }
395 
396 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
397 
398  if constexpr (std::is_same<TSource, float>::value && std::is_same<TTarget, uint8_t>::value && tChannels >= 2u && tChannels <= 4u)
399  {
400  if (targetFactor == uint8_t(1))
401  {
402  zipChannelsOnlySourceFactorNEON<TSource, TTarget, tChannels>(sourceFrames, targetFrame, width, height, sourceFactor, sourceFramesPaddingElements, targetFramePaddingElements);
403 
404  return;
405  }
406  }
407 
408 #endif // OCEAN_HARDWARE_NEON_VERSION
409 
410  bool allSourceFramesContinuous = true;
411 
412  if (sourceFramesPaddingElements != nullptr)
413  {
414  for (unsigned int n = 0u; n < tChannels; ++n)
415  {
416  if (sourceFramesPaddingElements[n] != 0u)
417  {
418  allSourceFramesContinuous = false;
419  break;
420  }
421  }
422  }
423 
424  if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
425  {
426  for (unsigned int n = 0u; n < width * height; ++n)
427  {
428  for (unsigned int c = 0u; c < tChannels; ++c)
429  {
430  targetFrame[n * tChannels + c] = TTarget(sourceFrames[c][n] * sourceFactor) * targetFactor;
431  }
432  }
433  }
434  else
435  {
436  const unsigned int targetFrameStrideElements = width * tChannels + targetFramePaddingElements;
437 
438  Indices32 sourceFrameStrideElements(tChannels);
439 
440  for (unsigned int c = 0u; c < tChannels; ++c)
441  {
442  if (sourceFramesPaddingElements == nullptr)
443  {
444  sourceFrameStrideElements[c] = width;
445  }
446  else
447  {
448  sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
449  }
450  }
451 
452  for (unsigned int y = 0u; y < height; ++y)
453  {
454  TTarget* const targetRow = targetFrame + y * targetFrameStrideElements;
455 
456  for (unsigned int x = 0u; x < width; ++x)
457  {
458  for (unsigned int c = 0u; c < tChannels; ++c)
459  {
460  *(targetRow + x * tChannels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x) * sourceFactor) * targetFactor;
461  }
462  }
463  }
464  }
465 }
466 
467 template <typename TSource, typename TTarget>
468 void AdvancedFrameChannels::zipChannels(const std::initializer_list<const TSource*>& sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const TSource sourceFactor, const TTarget targetFactor, const std::initializer_list<unsigned int>& sourceFramePaddingElements, const unsigned int targetFramePaddingElements)
469 {
470  ocean_assert(sourceFrames.size() >= 1);
471  ocean_assert(sourceFramePaddingElements.size() == 0 || sourceFrames.size() == sourceFramePaddingElements.size());
472 
473  assert(sourceFactor != TSource(0) && targetFactor != TTarget(0));
474  assert(sourceFactor != TSource(1) || targetFactor != TTarget(1)); // Identity factors, use FrameChannels::separateTo1Channel() instead!
475 
476  if (sourceFrames.size() == 2)
477  {
478  zipChannels<TSource, TTarget, 2u>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
479  }
480  else if (sourceFrames.size() == 3)
481  {
482  zipChannels<TSource, TTarget, 3u>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
483  }
484  else if (sourceFrames.size() == 4)
485  {
486  zipChannels<TSource, TTarget, 4u>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
487  }
488  else
489  {
490  zipChannels<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
491  }
492 }
493 
494 template <typename TSource, typename TTarget>
495 void AdvancedFrameChannels::separateTo1ChannelRuntime(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
496 {
497  ocean_assert(sourceFrame != nullptr);
498  ocean_assert(targetFrames != nullptr);
499 
500  ocean_assert(width != 0u && height != 0u);
501  ocean_assert(channels != 0u);
502 
503  assert(sourceFactor != TSource(0) && targetFactor != TTarget(0));
504  assert(sourceFactor != TSource(1) || targetFactor != TTarget(1)); // Identity factors, use FrameChannels::separateTo1Channel() instead!
505 
506 #ifdef OCEAN_DEBUG
507  for (unsigned int c = 0u; c < channels; ++c)
508  {
509  ocean_assert(targetFrames[c] != nullptr);
510  }
511 #endif
512 
513  if (sourceFramePaddingElements == 0u && targetFramesPaddingElements == nullptr)
514  {
515  for (unsigned int n = 0u; n < width * height; ++n)
516  {
517  for (unsigned int c = 0u; c < channels; ++c)
518  {
519  targetFrames[c][n] = TTarget(sourceFrame[n * channels + c] * sourceFactor) * targetFactor;
520  }
521  }
522  }
523  else if (targetFramesPaddingElements == nullptr)
524  {
525  ocean_assert(sourceFramePaddingElements != 0u);
526 
527  const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
528 
529  for (unsigned int y = 0u; y < height; ++y)
530  {
531  const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
532 
533  const unsigned int targetRowOffset = y * width;
534 
535  for (unsigned int x = 0u; x < width; ++x)
536  {
537  for (unsigned int c = 0u; c < channels; ++c)
538  {
539  *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * channels + c) * sourceFactor) * targetFactor;
540  }
541  }
542  }
543  }
544  else
545  {
546  const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
547 
548  Indices32 targetFrameStrideElements(channels);
549 
550  for (unsigned int c = 0u; c < channels; ++c)
551  {
552  targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
553  }
554 
555  for (unsigned int y = 0u; y < height; ++y)
556  {
557  const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
558 
559  for (unsigned int x = 0u; x < width; ++x)
560  {
561  for (unsigned int c = 0u; c < channels; ++c)
562  {
563  *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * channels + c) * sourceFactor) * targetFactor;
564  }
565  }
566  }
567  }
568 }
569 
570 template <typename TSource, typename TTarget>
571 void AdvancedFrameChannels::zipChannelsRuntime(const TSource* const* sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
572 {
573  ocean_assert(sourceFrames != nullptr);
574  ocean_assert(targetFrame != nullptr);
575 
576  ocean_assert(width != 0u && height != 0u);
577  ocean_assert(channels != 0u);
578 
579  assert(sourceFactor != TSource(0) && targetFactor != TTarget(0));
580  assert(sourceFactor != TSource(1) || targetFactor != TTarget(1)); // Identity factors, use FrameChannels::separateTo1Channel() instead!
581 
582  bool allSourceFramesContinuous = true;
583 
584  if (sourceFramesPaddingElements != nullptr)
585  {
586  for (unsigned int n = 0u; n < channels; ++n)
587  {
588  if (sourceFramesPaddingElements[n] != 0u)
589  {
590  allSourceFramesContinuous = false;
591  break;
592  }
593  }
594  }
595 
596  if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
597  {
598  for (unsigned int n = 0u; n < width * height; ++n)
599  {
600  for (unsigned int c = 0u; c < channels; ++c)
601  {
602  targetFrame[n * channels + c] = TTarget(sourceFrames[c][n] * sourceFactor) * targetFactor;
603  }
604  }
605  }
606  else
607  {
608  const unsigned int targetFrameStrideElements = width * channels + targetFramePaddingElements;
609 
610  Indices32 sourceFrameStrideElements(channels);
611 
612  for (unsigned int c = 0u; c < channels; ++c)
613  {
614  if (sourceFramesPaddingElements == nullptr)
615  {
616  sourceFrameStrideElements[c] = width;
617  }
618  else
619  {
620  sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
621  }
622  }
623 
624  for (unsigned int y = 0u; y < height; ++y)
625  {
626  TTarget* const targetRow = targetFrame + y * targetFrameStrideElements;
627 
628  for (unsigned int x = 0u; x < width; ++x)
629  {
630  for (unsigned int c = 0u; c < channels; ++c)
631  {
632  *(targetRow + x * channels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x) * sourceFactor) * targetFactor;
633  }
634  }
635  }
636  }
637 }
638 
639 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
640 
641 template <>
642 inline void AdvancedFrameChannels::separateTo1ChannelOnlyTargetFactorNEON<uint8_t, float, 2u>(const uint8_t* const sourceFrame, float* const* const targetFrames, const unsigned int width, const unsigned int height, const float targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
643 {
644  ocean_assert(sourceFrame != nullptr);
645  ocean_assert(targetFrames != nullptr);
646 
647  ocean_assert(width != 0u && height != 0u);
648 
649  ocean_assert(targetFactor != 0.0f);
650 
651  constexpr unsigned int tChannels = 2u;
652 
653  bool allTargetFramesContinuous = true;
654 
655  if (targetFramesPaddingElements != nullptr)
656  {
657  for (unsigned int n = 0u; n < tChannels; ++n)
658  {
659  if (targetFramesPaddingElements[n] != 0u)
660  {
661  allTargetFramesContinuous = false;
662  break;
663  }
664  }
665  }
666 
667  const float32x4_t targetFactor_32x4 = vdupq_n_f32(targetFactor);
668 
669  const uint8_t* source = sourceFrame;
670  float* target0 = targetFrames[0];
671  float* target1 = targetFrames[1];
672 
673  constexpr unsigned int tBlockSize = 16u;
674 
675  uint8x16x2_t source_u_8x16x2;
676 
677  if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
678  {
679  const unsigned int pixels = width * height;
680  const unsigned int blocks = pixels / tBlockSize;
681  const unsigned int remaining = pixels % tBlockSize;
682 
683  for (unsigned int n = 0u; n < blocks; ++n)
684  {
685  source_u_8x16x2 = vld2q_u8(source);
686 
687  const float32x4x4_t sourceA_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x2.val[0]);
688  const float32x4x4_t sourceB_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x2.val[1]);
689 
690  vst1q_f32(target0 + 0, vmulq_f32(sourceA_f_32x4x4.val[0], targetFactor_32x4));
691  vst1q_f32(target0 + 4, vmulq_f32(sourceA_f_32x4x4.val[1], targetFactor_32x4));
692  vst1q_f32(target0 + 8, vmulq_f32(sourceA_f_32x4x4.val[2], targetFactor_32x4));
693  vst1q_f32(target0 + 12, vmulq_f32(sourceA_f_32x4x4.val[3], targetFactor_32x4));
694 
695  vst1q_f32(target1 + 0, vmulq_f32(sourceB_f_32x4x4.val[0], targetFactor_32x4));
696  vst1q_f32(target1 + 4, vmulq_f32(sourceB_f_32x4x4.val[1], targetFactor_32x4));
697  vst1q_f32(target1 + 8, vmulq_f32(sourceB_f_32x4x4.val[2], targetFactor_32x4));
698  vst1q_f32(target1 + 12, vmulq_f32(sourceB_f_32x4x4.val[3], targetFactor_32x4));
699 
700  source += tBlockSize * tChannels;
701 
702  target0 += tBlockSize;
703  target1 += tBlockSize;
704  }
705 
706  for (unsigned int n = 0u; n < remaining; ++n)
707  {
708  target0[n] = float(source[n * tChannels + 0u]) * targetFactor;
709  target1[n] = float(source[n * tChannels + 1u]) * targetFactor;
710  }
711  }
712  else
713  {
714  const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[0];
715  const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[1];
716 
717  const unsigned int blocks = width / tBlockSize;
718  const unsigned int remaining = width % tBlockSize;
719 
720  for (unsigned int y = 0u; y < height; ++y)
721  {
722  for (unsigned int n = 0u; n < blocks; ++n)
723  {
724  source_u_8x16x2 = vld2q_u8(source);
725 
726  const float32x4x4_t sourceA_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x2.val[0]);
727  const float32x4x4_t sourceB_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x2.val[1]);
728 
729  vst1q_f32(target0 + 0, vmulq_f32(sourceA_f_32x4x4.val[0], targetFactor_32x4));
730  vst1q_f32(target0 + 4, vmulq_f32(sourceA_f_32x4x4.val[1], targetFactor_32x4));
731  vst1q_f32(target0 + 8, vmulq_f32(sourceA_f_32x4x4.val[2], targetFactor_32x4));
732  vst1q_f32(target0 + 12, vmulq_f32(sourceA_f_32x4x4.val[3], targetFactor_32x4));
733 
734  vst1q_f32(target1 + 0, vmulq_f32(sourceB_f_32x4x4.val[0], targetFactor_32x4));
735  vst1q_f32(target1 + 4, vmulq_f32(sourceB_f_32x4x4.val[1], targetFactor_32x4));
736  vst1q_f32(target1 + 8, vmulq_f32(sourceB_f_32x4x4.val[2], targetFactor_32x4));
737  vst1q_f32(target1 + 12, vmulq_f32(sourceB_f_32x4x4.val[3], targetFactor_32x4));
738 
739  source += tBlockSize * tChannels;
740 
741  target0 += tBlockSize;
742  target1 += tBlockSize;
743  }
744 
745  for (unsigned int n = 0u; n < remaining; ++n)
746  {
747  target0[n] = float(source[n * tChannels + 0u]) * targetFactor;
748  target1[n] = float(source[n * tChannels + 1u]) * targetFactor;
749  }
750 
751  source += remaining * tChannels + sourceFramePaddingElements;
752  target0 += remaining + targetFrame0PaddingElements;
753  target1 += remaining + targetFrame1PaddingElements;
754  }
755  }
756 }
757 
758 template <>
759 inline void AdvancedFrameChannels::separateTo1ChannelOnlyTargetFactorNEON<uint8_t, float, 3u>(const uint8_t* const sourceFrame, float* const* const targetFrames, const unsigned int width, const unsigned int height, const float targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
760 {
761  ocean_assert(sourceFrame != nullptr);
762  ocean_assert(targetFrames != nullptr);
763 
764  ocean_assert(width != 0u && height != 0u);
765 
766  constexpr unsigned int tChannels = 3u;
767 
768  bool allTargetFramesContinuous = true;
769 
770  if (targetFramesPaddingElements != nullptr)
771  {
772  for (unsigned int n = 0u; n < tChannels; ++n)
773  {
774  if (targetFramesPaddingElements[n] != 0u)
775  {
776  allTargetFramesContinuous = false;
777  break;
778  }
779  }
780  }
781 
782  const float32x4_t targetFactor_32x4 = vdupq_n_f32(targetFactor);
783 
784  const uint8_t* source = sourceFrame;
785  float* target0 = targetFrames[0];
786  float* target1 = targetFrames[1];
787  float* target2 = targetFrames[2];
788 
789  constexpr unsigned int tBlockSize = 16u;
790 
791  uint8x16x3_t source_u_8x16x3;
792 
793  if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
794  {
795  const unsigned int pixels = width * height;
796  const unsigned int blocks = pixels / tBlockSize;
797  const unsigned int remaining = pixels % tBlockSize;
798 
799  for (unsigned int n = 0u; n < blocks; ++n)
800  {
801  source_u_8x16x3 = vld3q_u8(source);
802 
803  const float32x4x4_t sourceA_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x3.val[0]);
804  const float32x4x4_t sourceB_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x3.val[1]);
805  const float32x4x4_t sourceC_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x3.val[2]);
806 
807  vst1q_f32(target0 + 0, vmulq_f32(sourceA_f_32x4x4.val[0], targetFactor_32x4));
808  vst1q_f32(target0 + 4, vmulq_f32(sourceA_f_32x4x4.val[1], targetFactor_32x4));
809  vst1q_f32(target0 + 8, vmulq_f32(sourceA_f_32x4x4.val[2], targetFactor_32x4));
810  vst1q_f32(target0 + 12, vmulq_f32(sourceA_f_32x4x4.val[3], targetFactor_32x4));
811 
812  vst1q_f32(target1 + 0, vmulq_f32(sourceB_f_32x4x4.val[0], targetFactor_32x4));
813  vst1q_f32(target1 + 4, vmulq_f32(sourceB_f_32x4x4.val[1], targetFactor_32x4));
814  vst1q_f32(target1 + 8, vmulq_f32(sourceB_f_32x4x4.val[2], targetFactor_32x4));
815  vst1q_f32(target1 + 12, vmulq_f32(sourceB_f_32x4x4.val[3], targetFactor_32x4));
816 
817  vst1q_f32(target2 + 0, vmulq_f32(sourceC_f_32x4x4.val[0], targetFactor_32x4));
818  vst1q_f32(target2 + 4, vmulq_f32(sourceC_f_32x4x4.val[1], targetFactor_32x4));
819  vst1q_f32(target2 + 8, vmulq_f32(sourceC_f_32x4x4.val[2], targetFactor_32x4));
820  vst1q_f32(target2 + 12, vmulq_f32(sourceC_f_32x4x4.val[3], targetFactor_32x4));
821 
822  source += tBlockSize * tChannels;
823 
824  target0 += tBlockSize;
825  target1 += tBlockSize;
826  target2 += tBlockSize;
827  }
828 
829  for (unsigned int n = 0u; n < remaining; ++n)
830  {
831  target0[n] = float(source[n * tChannels + 0u]) * targetFactor;
832  target1[n] = float(source[n * tChannels + 1u]) * targetFactor;
833  target2[n] = float(source[n * tChannels + 2u]) * targetFactor;
834  }
835  }
836  else
837  {
838  const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[0];
839  const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[1];
840  const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[2];
841 
842  const unsigned int blocks = width / tBlockSize;
843  const unsigned int remaining = width % tBlockSize;
844 
845  for (unsigned int y = 0u; y < height; ++y)
846  {
847  for (unsigned int n = 0u; n < blocks; ++n)
848  {
849  source_u_8x16x3 = vld3q_u8(source);
850 
851  const float32x4x4_t sourceA_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x3.val[0]);
852  const float32x4x4_t sourceB_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x3.val[1]);
853  const float32x4x4_t sourceC_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x3.val[2]);
854 
855  vst1q_f32(target0 + 0, vmulq_f32(sourceA_f_32x4x4.val[0], targetFactor_32x4));
856  vst1q_f32(target0 + 4, vmulq_f32(sourceA_f_32x4x4.val[1], targetFactor_32x4));
857  vst1q_f32(target0 + 8, vmulq_f32(sourceA_f_32x4x4.val[2], targetFactor_32x4));
858  vst1q_f32(target0 + 12, vmulq_f32(sourceA_f_32x4x4.val[3], targetFactor_32x4));
859 
860  vst1q_f32(target1 + 0, vmulq_f32(sourceB_f_32x4x4.val[0], targetFactor_32x4));
861  vst1q_f32(target1 + 4, vmulq_f32(sourceB_f_32x4x4.val[1], targetFactor_32x4));
862  vst1q_f32(target1 + 8, vmulq_f32(sourceB_f_32x4x4.val[2], targetFactor_32x4));
863  vst1q_f32(target1 + 12, vmulq_f32(sourceB_f_32x4x4.val[3], targetFactor_32x4));
864 
865  vst1q_f32(target2 + 0, vmulq_f32(sourceC_f_32x4x4.val[0], targetFactor_32x4));
866  vst1q_f32(target2 + 4, vmulq_f32(sourceC_f_32x4x4.val[1], targetFactor_32x4));
867  vst1q_f32(target2 + 8, vmulq_f32(sourceC_f_32x4x4.val[2], targetFactor_32x4));
868  vst1q_f32(target2 + 12, vmulq_f32(sourceC_f_32x4x4.val[3], targetFactor_32x4));
869 
870  source += tBlockSize * tChannels;
871 
872  target0 += tBlockSize;
873  target1 += tBlockSize;
874  target2 += tBlockSize;
875  }
876 
877  for (unsigned int n = 0u; n < remaining; ++n)
878  {
879  target0[n] = float(source[n * tChannels + 0u]) * targetFactor;
880  target1[n] = float(source[n * tChannels + 1u]) * targetFactor;
881  target2[n] = float(source[n * tChannels + 2u]) * targetFactor;
882  }
883 
884  source += remaining * tChannels + sourceFramePaddingElements;
885  target0 += remaining + targetFrame0PaddingElements;
886  target1 += remaining + targetFrame1PaddingElements;
887  target2 += remaining + targetFrame2PaddingElements;
888  }
889  }
890 }
891 
892 template <>
893 inline void AdvancedFrameChannels::separateTo1ChannelOnlyTargetFactorNEON<uint8_t, float, 4u>(const uint8_t* const sourceFrame, float* const* const targetFrames, const unsigned int width, const unsigned int height, const float targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
894 {
895  ocean_assert(sourceFrame != nullptr);
896  ocean_assert(targetFrames != nullptr);
897 
898  ocean_assert(width != 0u && height != 0u);
899 
900  constexpr unsigned int tChannels = 4u;
901 
902  bool allTargetFramesContinuous = true;
903 
904  if (targetFramesPaddingElements != nullptr)
905  {
906  for (unsigned int n = 0u; n < tChannels; ++n)
907  {
908  if (targetFramesPaddingElements[n] != 0u)
909  {
910  allTargetFramesContinuous = false;
911  break;
912  }
913  }
914  }
915 
916  const float32x4_t targetFactor_32x4 = vdupq_n_f32(targetFactor);
917 
918  const uint8_t* source = sourceFrame;
919  float* target0 = targetFrames[0];
920  float* target1 = targetFrames[1];
921  float* target2 = targetFrames[2];
922  float* target3 = targetFrames[3];
923 
924  constexpr unsigned int tBlockSize = 16u;
925 
926  uint8x16x4_t source_u_8x16x4;
927 
928  if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
929  {
930  const unsigned int pixels = width * height;
931  const unsigned int blocks = pixels / tBlockSize;
932  const unsigned int remaining = pixels % tBlockSize;
933 
934  for (unsigned int n = 0u; n < blocks; ++n)
935  {
936  source_u_8x16x4 = vld4q_u8(source);
937 
938  const float32x4x4_t sourceA_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x4.val[0]);
939  const float32x4x4_t sourceB_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x4.val[1]);
940  const float32x4x4_t sourceC_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x4.val[2]);
941  const float32x4x4_t sourceD_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x4.val[3]);
942 
943  vst1q_f32(target0 + 0, vmulq_f32(sourceA_f_32x4x4.val[0], targetFactor_32x4));
944  vst1q_f32(target0 + 4, vmulq_f32(sourceA_f_32x4x4.val[1], targetFactor_32x4));
945  vst1q_f32(target0 + 8, vmulq_f32(sourceA_f_32x4x4.val[2], targetFactor_32x4));
946  vst1q_f32(target0 + 12, vmulq_f32(sourceA_f_32x4x4.val[3], targetFactor_32x4));
947 
948  vst1q_f32(target1 + 0, vmulq_f32(sourceB_f_32x4x4.val[0], targetFactor_32x4));
949  vst1q_f32(target1 + 4, vmulq_f32(sourceB_f_32x4x4.val[1], targetFactor_32x4));
950  vst1q_f32(target1 + 8, vmulq_f32(sourceB_f_32x4x4.val[2], targetFactor_32x4));
951  vst1q_f32(target1 + 12, vmulq_f32(sourceB_f_32x4x4.val[3], targetFactor_32x4));
952 
953  vst1q_f32(target2 + 0, vmulq_f32(sourceC_f_32x4x4.val[0], targetFactor_32x4));
954  vst1q_f32(target2 + 4, vmulq_f32(sourceC_f_32x4x4.val[1], targetFactor_32x4));
955  vst1q_f32(target2 + 8, vmulq_f32(sourceC_f_32x4x4.val[2], targetFactor_32x4));
956  vst1q_f32(target2 + 12, vmulq_f32(sourceC_f_32x4x4.val[3], targetFactor_32x4));
957 
958  vst1q_f32(target3 + 0, vmulq_f32(sourceD_f_32x4x4.val[0], targetFactor_32x4));
959  vst1q_f32(target3 + 4, vmulq_f32(sourceD_f_32x4x4.val[1], targetFactor_32x4));
960  vst1q_f32(target3 + 8, vmulq_f32(sourceD_f_32x4x4.val[2], targetFactor_32x4));
961  vst1q_f32(target3 + 12, vmulq_f32(sourceD_f_32x4x4.val[3], targetFactor_32x4));
962 
963  source += tBlockSize * tChannels;
964 
965  target0 += tBlockSize;
966  target1 += tBlockSize;
967  target2 += tBlockSize;
968  target3 += tBlockSize;
969  }
970 
971  for (unsigned int n = 0u; n < remaining; ++n)
972  {
973  target0[n] = float(source[n * tChannels + 0u]) * targetFactor;
974  target1[n] = float(source[n * tChannels + 1u]) * targetFactor;
975  target2[n] = float(source[n * tChannels + 2u]) * targetFactor;
976  target3[n] = float(source[n * tChannels + 3u]) * targetFactor;
977  }
978  }
979  else
980  {
981  const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[0];
982  const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[1];
983  const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[2];
984  const unsigned int targetFrame3PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[3];
985 
986  const unsigned int blocks = width / tBlockSize;
987  const unsigned int remaining = width % tBlockSize;
988 
989  for (unsigned int y = 0u; y < height; ++y)
990  {
991  for (unsigned int n = 0u; n < blocks; ++n)
992  {
993  source_u_8x16x4 = vld4q_u8(source);
994 
995  const float32x4x4_t sourceA_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x4.val[0]);
996  const float32x4x4_t sourceB_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x4.val[1]);
997  const float32x4x4_t sourceC_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x4.val[2]);
998  const float32x4x4_t sourceD_f_32x4x4 = NEON::cast16ElementsNEON(source_u_8x16x4.val[3]);
999 
1000  vst1q_f32(target0 + 0, vmulq_f32(sourceA_f_32x4x4.val[0], targetFactor_32x4));
1001  vst1q_f32(target0 + 4, vmulq_f32(sourceA_f_32x4x4.val[1], targetFactor_32x4));
1002  vst1q_f32(target0 + 8, vmulq_f32(sourceA_f_32x4x4.val[2], targetFactor_32x4));
1003  vst1q_f32(target0 + 12, vmulq_f32(sourceA_f_32x4x4.val[3], targetFactor_32x4));
1004 
1005  vst1q_f32(target1 + 0, vmulq_f32(sourceB_f_32x4x4.val[0], targetFactor_32x4));
1006  vst1q_f32(target1 + 4, vmulq_f32(sourceB_f_32x4x4.val[1], targetFactor_32x4));
1007  vst1q_f32(target1 + 8, vmulq_f32(sourceB_f_32x4x4.val[2], targetFactor_32x4));
1008  vst1q_f32(target1 + 12, vmulq_f32(sourceB_f_32x4x4.val[3], targetFactor_32x4));
1009 
1010  vst1q_f32(target2 + 0, vmulq_f32(sourceC_f_32x4x4.val[0], targetFactor_32x4));
1011  vst1q_f32(target2 + 4, vmulq_f32(sourceC_f_32x4x4.val[1], targetFactor_32x4));
1012  vst1q_f32(target2 + 8, vmulq_f32(sourceC_f_32x4x4.val[2], targetFactor_32x4));
1013  vst1q_f32(target2 + 12, vmulq_f32(sourceC_f_32x4x4.val[3], targetFactor_32x4));
1014 
1015  vst1q_f32(target3 + 0, vmulq_f32(sourceD_f_32x4x4.val[0], targetFactor_32x4));
1016  vst1q_f32(target3 + 4, vmulq_f32(sourceD_f_32x4x4.val[1], targetFactor_32x4));
1017  vst1q_f32(target3 + 8, vmulq_f32(sourceD_f_32x4x4.val[2], targetFactor_32x4));
1018  vst1q_f32(target3 + 12, vmulq_f32(sourceD_f_32x4x4.val[3], targetFactor_32x4));
1019 
1020  source += tBlockSize * tChannels;
1021 
1022  target0 += tBlockSize;
1023  target1 += tBlockSize;
1024  target2 += tBlockSize;
1025  target3 += tBlockSize;
1026  }
1027 
1028  for (unsigned int n = 0u; n < remaining; ++n)
1029  {
1030  target0[n] = float(source[n * tChannels + 0u]) * targetFactor;
1031  target1[n] = float(source[n * tChannels + 1u]) * targetFactor;
1032  target2[n] = float(source[n * tChannels + 2u]) * targetFactor;
1033  target3[n] = float(source[n * tChannels + 3u]) * targetFactor;
1034  }
1035 
1036  source += remaining * tChannels + sourceFramePaddingElements;
1037  target0 += remaining + targetFrame0PaddingElements;
1038  target1 += remaining + targetFrame1PaddingElements;
1039  target2 += remaining + targetFrame2PaddingElements;
1040  target3 += remaining + targetFrame3PaddingElements;
1041  }
1042  }
1043 }
1044 
1045 template <>
1046 void AdvancedFrameChannels::zipChannelsOnlySourceFactorNEON<float, uint8_t, 2u>(const float* const* const sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const float sourceFactor, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
1047 {
1048  ocean_assert(sourceFrames != nullptr);
1049  ocean_assert(targetFrame != nullptr);
1050 
1051  ocean_assert(width != 0u && height != 0u);
1052 
1053  constexpr unsigned int tChannels = 2u;
1054 
1055  bool allSourceFramesContinuous = true;
1056 
1057  if (sourceFramesPaddingElements != nullptr)
1058  {
1059  for (unsigned int n = 0u; n < tChannels; ++n)
1060  {
1061  if (sourceFramesPaddingElements[n] != 0u)
1062  {
1063  allSourceFramesContinuous = false;
1064  break;
1065  }
1066  }
1067  }
1068 
1069  const float32x4_t sourceFactor_32x4 = vdupq_n_f32(sourceFactor);
1070 
1071  const float* source0 = sourceFrames[0];
1072  const float* source1 = sourceFrames[1];
1073  uint8_t* target = targetFrame;
1074 
1075  constexpr unsigned int tBlockSize = 16u;
1076 
1077  uint8x16x2_t target_8x16x2;
1078 
1079  if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
1080  {
1081  const unsigned int pixels = width * height;
1082  const unsigned int blocks = pixels / tBlockSize;
1083  const unsigned int remaining = pixels % tBlockSize;
1084 
1085  for (unsigned int n = 0u; n < blocks; ++n)
1086  {
1087  const float32x4_t source0_A_f_32x4 = vmulq_f32(vld1q_f32(source0 + 0), sourceFactor_32x4);
1088  const float32x4_t source0_B_f_32x4 = vmulq_f32(vld1q_f32(source0 + 4), sourceFactor_32x4);
1089  const float32x4_t source0_C_f_32x4 = vmulq_f32(vld1q_f32(source0 + 8), sourceFactor_32x4);
1090  const float32x4_t source0_D_f_32x4 = vmulq_f32(vld1q_f32(source0 + 12), sourceFactor_32x4);
1091 
1092  const float32x4_t source1_A_f_32x4 = vmulq_f32(vld1q_f32(source1 + 0), sourceFactor_32x4);
1093  const float32x4_t source1_B_f_32x4 = vmulq_f32(vld1q_f32(source1 + 4), sourceFactor_32x4);
1094  const float32x4_t source1_C_f_32x4 = vmulq_f32(vld1q_f32(source1 + 8), sourceFactor_32x4);
1095  const float32x4_t source1_D_f_32x4 = vmulq_f32(vld1q_f32(source1 + 12), sourceFactor_32x4);
1096 
1097  target_8x16x2.val[0] = NEON::cast16ElementsNEON(source0_A_f_32x4, source0_B_f_32x4, source0_C_f_32x4, source0_D_f_32x4);
1098  target_8x16x2.val[1] = NEON::cast16ElementsNEON(source1_A_f_32x4, source1_B_f_32x4, source1_C_f_32x4, source1_D_f_32x4);
1099 
1100  vst2q_u8(target, target_8x16x2);
1101 
1102  source0 += tBlockSize;
1103  source1 += tBlockSize;
1104 
1105  target += tBlockSize * tChannels;
1106  }
1107 
1108  for (unsigned int n = 0u; n < remaining; ++n)
1109  {
1110  ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
1111  ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
1112 
1113  target[n * tChannels + 0u] = uint8_t(source0[n] * sourceFactor);
1114  target[n * tChannels + 1u] = uint8_t(source1[n] * sourceFactor);
1115  }
1116  }
1117  else
1118  {
1119  const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
1120  const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
1121 
1122  const unsigned int blocks = width / tBlockSize;
1123  const unsigned int remaining = width % tBlockSize;
1124 
1125  for (unsigned int y = 0u; y < height; ++y)
1126  {
1127  for (unsigned int n = 0u; n < blocks; ++n)
1128  {
1129  const float32x4_t source0_A_f_32x4 = vmulq_f32(vld1q_f32(source0 + 0), sourceFactor_32x4);
1130  const float32x4_t source0_B_f_32x4 = vmulq_f32(vld1q_f32(source0 + 4), sourceFactor_32x4);
1131  const float32x4_t source0_C_f_32x4 = vmulq_f32(vld1q_f32(source0 + 8), sourceFactor_32x4);
1132  const float32x4_t source0_D_f_32x4 = vmulq_f32(vld1q_f32(source0 + 12), sourceFactor_32x4);
1133 
1134  const float32x4_t source1_A_f_32x4 = vmulq_f32(vld1q_f32(source1 + 0), sourceFactor_32x4);
1135  const float32x4_t source1_B_f_32x4 = vmulq_f32(vld1q_f32(source1 + 4), sourceFactor_32x4);
1136  const float32x4_t source1_C_f_32x4 = vmulq_f32(vld1q_f32(source1 + 8), sourceFactor_32x4);
1137  const float32x4_t source1_D_f_32x4 = vmulq_f32(vld1q_f32(source1 + 12), sourceFactor_32x4);
1138 
1139  target_8x16x2.val[0] = NEON::cast16ElementsNEON(source0_A_f_32x4, source0_B_f_32x4, source0_C_f_32x4, source0_D_f_32x4);
1140  target_8x16x2.val[1] = NEON::cast16ElementsNEON(source1_A_f_32x4, source1_B_f_32x4, source1_C_f_32x4, source1_D_f_32x4);
1141 
1142  vst2q_u8(target, target_8x16x2);
1143 
1144  source0 += tBlockSize;
1145  source1 += tBlockSize;
1146 
1147  target += tBlockSize * tChannels;
1148  }
1149 
1150  for (unsigned int n = 0u; n < remaining; ++n)
1151  {
1152  ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
1153  ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
1154 
1155  target[n * tChannels + 0u] = uint8_t(source0[n] * sourceFactor);
1156  target[n * tChannels + 1u] = uint8_t(source1[n] * sourceFactor);
1157  }
1158 
1159  source0 += remaining + sourceFrame0PaddingElements;
1160  source1 += remaining + sourceFrame1PaddingElements;
1161  target += remaining * tChannels + targetFramePaddingElements;
1162  }
1163  }
1164 }
1165 
1166 template <>
1167 void AdvancedFrameChannels::zipChannelsOnlySourceFactorNEON<float, uint8_t, 3u>(const float* const* const sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const float sourceFactor, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
1168 {
1169  ocean_assert(sourceFrames != nullptr);
1170  ocean_assert(targetFrame != nullptr);
1171 
1172  ocean_assert(width != 0u && height != 0u);
1173 
1174  constexpr unsigned int tChannels = 3u;
1175 
1176  bool allSourceFramesContinuous = true;
1177 
1178  if (sourceFramesPaddingElements != nullptr)
1179  {
1180  for (unsigned int n = 0u; n < tChannels; ++n)
1181  {
1182  if (sourceFramesPaddingElements[n] != 0u)
1183  {
1184  allSourceFramesContinuous = false;
1185  break;
1186  }
1187  }
1188  }
1189 
1190  const float32x4_t sourceFactor_32x4 = vdupq_n_f32(sourceFactor);
1191 
1192  const float* source0 = sourceFrames[0];
1193  const float* source1 = sourceFrames[1];
1194  const float* source2 = sourceFrames[2];
1195  uint8_t* target = targetFrame;
1196 
1197  constexpr unsigned int tBlockSize = 16u;
1198 
1199  uint8x16x3_t target_8x16x3;
1200 
1201  if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
1202  {
1203  const unsigned int pixels = width * height;
1204  const unsigned int blocks = pixels / tBlockSize;
1205  const unsigned int remaining = pixels % tBlockSize;
1206 
1207  for (unsigned int n = 0u; n < blocks; ++n)
1208  {
1209  const float32x4_t source0_A_f_32x4 = vmulq_f32(vld1q_f32(source0 + 0), sourceFactor_32x4);
1210  const float32x4_t source0_B_f_32x4 = vmulq_f32(vld1q_f32(source0 + 4), sourceFactor_32x4);
1211  const float32x4_t source0_C_f_32x4 = vmulq_f32(vld1q_f32(source0 + 8), sourceFactor_32x4);
1212  const float32x4_t source0_D_f_32x4 = vmulq_f32(vld1q_f32(source0 + 12), sourceFactor_32x4);
1213 
1214  const float32x4_t source1_A_f_32x4 = vmulq_f32(vld1q_f32(source1 + 0), sourceFactor_32x4);
1215  const float32x4_t source1_B_f_32x4 = vmulq_f32(vld1q_f32(source1 + 4), sourceFactor_32x4);
1216  const float32x4_t source1_C_f_32x4 = vmulq_f32(vld1q_f32(source1 + 8), sourceFactor_32x4);
1217  const float32x4_t source1_D_f_32x4 = vmulq_f32(vld1q_f32(source1 + 12), sourceFactor_32x4);
1218 
1219  const float32x4_t source2_A_f_32x4 = vmulq_f32(vld1q_f32(source2 + 0), sourceFactor_32x4);
1220  const float32x4_t source2_B_f_32x4 = vmulq_f32(vld1q_f32(source2 + 4), sourceFactor_32x4);
1221  const float32x4_t source2_C_f_32x4 = vmulq_f32(vld1q_f32(source2 + 8), sourceFactor_32x4);
1222  const float32x4_t source2_D_f_32x4 = vmulq_f32(vld1q_f32(source2 + 12), sourceFactor_32x4);
1223 
1224  target_8x16x3.val[0] = NEON::cast16ElementsNEON(source0_A_f_32x4, source0_B_f_32x4, source0_C_f_32x4, source0_D_f_32x4);
1225  target_8x16x3.val[1] = NEON::cast16ElementsNEON(source1_A_f_32x4, source1_B_f_32x4, source1_C_f_32x4, source1_D_f_32x4);
1226  target_8x16x3.val[2] = NEON::cast16ElementsNEON(source2_A_f_32x4, source2_B_f_32x4, source2_C_f_32x4, source2_D_f_32x4);
1227 
1228  vst3q_u8(target, target_8x16x3);
1229 
1230  source0 += tBlockSize;
1231  source1 += tBlockSize;
1232  source2 += tBlockSize;
1233 
1234  target += tBlockSize * tChannels;
1235  }
1236 
1237  for (unsigned int n = 0u; n < remaining; ++n)
1238  {
1239  ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
1240  ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
1241  ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
1242 
1243  target[n * tChannels + 0u] = uint8_t(source0[n] * sourceFactor);
1244  target[n * tChannels + 1u] = uint8_t(source1[n] * sourceFactor);
1245  target[n * tChannels + 2u] = uint8_t(source2[n] * sourceFactor);
1246  }
1247  }
1248  else
1249  {
1250  const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
1251  const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
1252  const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
1253 
1254  const unsigned int blocks = width / tBlockSize;
1255  const unsigned int remaining = width % tBlockSize;
1256 
1257  for (unsigned int y = 0u; y < height; ++y)
1258  {
1259  for (unsigned int n = 0u; n < blocks; ++n)
1260  {
1261  const float32x4_t source0_A_f_32x4 = vmulq_f32(vld1q_f32(source0 + 0), sourceFactor_32x4);
1262  const float32x4_t source0_B_f_32x4 = vmulq_f32(vld1q_f32(source0 + 4), sourceFactor_32x4);
1263  const float32x4_t source0_C_f_32x4 = vmulq_f32(vld1q_f32(source0 + 8), sourceFactor_32x4);
1264  const float32x4_t source0_D_f_32x4 = vmulq_f32(vld1q_f32(source0 + 12), sourceFactor_32x4);
1265 
1266  const float32x4_t source1_A_f_32x4 = vmulq_f32(vld1q_f32(source1 + 0), sourceFactor_32x4);
1267  const float32x4_t source1_B_f_32x4 = vmulq_f32(vld1q_f32(source1 + 4), sourceFactor_32x4);
1268  const float32x4_t source1_C_f_32x4 = vmulq_f32(vld1q_f32(source1 + 8), sourceFactor_32x4);
1269  const float32x4_t source1_D_f_32x4 = vmulq_f32(vld1q_f32(source1 + 12), sourceFactor_32x4);
1270 
1271  const float32x4_t source2_A_f_32x4 = vmulq_f32(vld1q_f32(source2 + 0), sourceFactor_32x4);
1272  const float32x4_t source2_B_f_32x4 = vmulq_f32(vld1q_f32(source2 + 4), sourceFactor_32x4);
1273  const float32x4_t source2_C_f_32x4 = vmulq_f32(vld1q_f32(source2 + 8), sourceFactor_32x4);
1274  const float32x4_t source2_D_f_32x4 = vmulq_f32(vld1q_f32(source2 + 12), sourceFactor_32x4);
1275 
1276  target_8x16x3.val[0] = NEON::cast16ElementsNEON(source0_A_f_32x4, source0_B_f_32x4, source0_C_f_32x4, source0_D_f_32x4);
1277  target_8x16x3.val[1] = NEON::cast16ElementsNEON(source1_A_f_32x4, source1_B_f_32x4, source1_C_f_32x4, source1_D_f_32x4);
1278  target_8x16x3.val[2] = NEON::cast16ElementsNEON(source2_A_f_32x4, source2_B_f_32x4, source2_C_f_32x4, source2_D_f_32x4);
1279 
1280  vst3q_u8(target, target_8x16x3);
1281 
1282  source0 += tBlockSize;
1283  source1 += tBlockSize;
1284  source2 += tBlockSize;
1285 
1286  target += tBlockSize * tChannels;
1287  }
1288 
1289  for (unsigned int n = 0u; n < remaining; ++n)
1290  {
1291  ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
1292  ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
1293  ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
1294 
1295  target[n * tChannels + 0u] = uint8_t(source0[n] * sourceFactor);
1296  target[n * tChannels + 1u] = uint8_t(source1[n] * sourceFactor);
1297  target[n * tChannels + 2u] = uint8_t(source2[n] * sourceFactor);
1298  }
1299 
1300  source0 += remaining + sourceFrame0PaddingElements;
1301  source1 += remaining + sourceFrame1PaddingElements;
1302  source2 += remaining + sourceFrame2PaddingElements;
1303  target += remaining * tChannels + targetFramePaddingElements;
1304  }
1305  }
1306 }
1307 
1308 template <>
1309 void AdvancedFrameChannels::zipChannelsOnlySourceFactorNEON<float, uint8_t, 4u>(const float* const* const sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const float sourceFactor, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
1310 {
1311  ocean_assert(sourceFrames != nullptr);
1312  ocean_assert(targetFrame != nullptr);
1313 
1314  ocean_assert(width != 0u && height != 0u);
1315 
1316  constexpr unsigned int tChannels = 4u;
1317 
1318  bool allSourceFramesContinuous = true;
1319 
1320  if (sourceFramesPaddingElements != nullptr)
1321  {
1322  for (unsigned int n = 0u; n < tChannels; ++n)
1323  {
1324  if (sourceFramesPaddingElements[n] != 0u)
1325  {
1326  allSourceFramesContinuous = false;
1327  break;
1328  }
1329  }
1330  }
1331 
1332  const float32x4_t sourceFactor_32x4 = vdupq_n_f32(sourceFactor);
1333 
1334  const float* source0 = sourceFrames[0];
1335  const float* source1 = sourceFrames[1];
1336  const float* source2 = sourceFrames[2];
1337  const float* source3 = sourceFrames[3];
1338  uint8_t* target = targetFrame;
1339 
1340  constexpr unsigned int tBlockSize = 16u;
1341 
1342  uint8x16x4_t target_8x16x4;
1343 
1344  if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
1345  {
1346  const unsigned int pixels = width * height;
1347  const unsigned int blocks = pixels / tBlockSize;
1348  const unsigned int remaining = pixels % tBlockSize;
1349 
1350  for (unsigned int n = 0u; n < blocks; ++n)
1351  {
1352  const float32x4_t source0_A_f_32x4 = vmulq_f32(vld1q_f32(source0 + 0), sourceFactor_32x4);
1353  const float32x4_t source0_B_f_32x4 = vmulq_f32(vld1q_f32(source0 + 4), sourceFactor_32x4);
1354  const float32x4_t source0_C_f_32x4 = vmulq_f32(vld1q_f32(source0 + 8), sourceFactor_32x4);
1355  const float32x4_t source0_D_f_32x4 = vmulq_f32(vld1q_f32(source0 + 12), sourceFactor_32x4);
1356 
1357  const float32x4_t source1_A_f_32x4 = vmulq_f32(vld1q_f32(source1 + 0), sourceFactor_32x4);
1358  const float32x4_t source1_B_f_32x4 = vmulq_f32(vld1q_f32(source1 + 4), sourceFactor_32x4);
1359  const float32x4_t source1_C_f_32x4 = vmulq_f32(vld1q_f32(source1 + 8), sourceFactor_32x4);
1360  const float32x4_t source1_D_f_32x4 = vmulq_f32(vld1q_f32(source1 + 12), sourceFactor_32x4);
1361 
1362  const float32x4_t source2_A_f_32x4 = vmulq_f32(vld1q_f32(source2 + 0), sourceFactor_32x4);
1363  const float32x4_t source2_B_f_32x4 = vmulq_f32(vld1q_f32(source2 + 4), sourceFactor_32x4);
1364  const float32x4_t source2_C_f_32x4 = vmulq_f32(vld1q_f32(source2 + 8), sourceFactor_32x4);
1365  const float32x4_t source2_D_f_32x4 = vmulq_f32(vld1q_f32(source2 + 12), sourceFactor_32x4);
1366 
1367  const float32x4_t source3_A_f_32x4 = vmulq_f32(vld1q_f32(source3 + 0), sourceFactor_32x4);
1368  const float32x4_t source3_B_f_32x4 = vmulq_f32(vld1q_f32(source3 + 4), sourceFactor_32x4);
1369  const float32x4_t source3_C_f_32x4 = vmulq_f32(vld1q_f32(source3 + 8), sourceFactor_32x4);
1370  const float32x4_t source3_D_f_32x4 = vmulq_f32(vld1q_f32(source3 + 12), sourceFactor_32x4);
1371 
1372  target_8x16x4.val[0] = NEON::cast16ElementsNEON(source0_A_f_32x4, source0_B_f_32x4, source0_C_f_32x4, source0_D_f_32x4);
1373  target_8x16x4.val[1] = NEON::cast16ElementsNEON(source1_A_f_32x4, source1_B_f_32x4, source1_C_f_32x4, source1_D_f_32x4);
1374  target_8x16x4.val[2] = NEON::cast16ElementsNEON(source2_A_f_32x4, source2_B_f_32x4, source2_C_f_32x4, source2_D_f_32x4);
1375  target_8x16x4.val[3] = NEON::cast16ElementsNEON(source3_A_f_32x4, source3_B_f_32x4, source3_C_f_32x4, source3_D_f_32x4);
1376 
1377  vst4q_u8(target, target_8x16x4);
1378 
1379  source0 += tBlockSize;
1380  source1 += tBlockSize;
1381  source2 += tBlockSize;
1382  source3 += tBlockSize;
1383 
1384  target += tBlockSize * tChannels;
1385  }
1386 
1387  for (unsigned int n = 0u; n < remaining; ++n)
1388  {
1389  ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
1390  ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
1391  ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
1392  ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
1393 
1394  target[n * tChannels + 0u] = uint8_t(source0[n] * sourceFactor);
1395  target[n * tChannels + 1u] = uint8_t(source1[n] * sourceFactor);
1396  target[n * tChannels + 2u] = uint8_t(source2[n] * sourceFactor);
1397  target[n * tChannels + 3u] = uint8_t(source3[n] * sourceFactor);
1398  }
1399  }
1400  else
1401  {
1402  const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
1403  const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
1404  const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
1405  const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[3];
1406 
1407  const unsigned int blocks = width / tBlockSize;
1408  const unsigned int remaining = width % tBlockSize;
1409 
1410  for (unsigned int y = 0u; y < height; ++y)
1411  {
1412  for (unsigned int n = 0u; n < blocks; ++n)
1413  {
1414  const float32x4_t source0_A_f_32x4 = vmulq_f32(vld1q_f32(source0 + 0), sourceFactor_32x4);
1415  const float32x4_t source0_B_f_32x4 = vmulq_f32(vld1q_f32(source0 + 4), sourceFactor_32x4);
1416  const float32x4_t source0_C_f_32x4 = vmulq_f32(vld1q_f32(source0 + 8), sourceFactor_32x4);
1417  const float32x4_t source0_D_f_32x4 = vmulq_f32(vld1q_f32(source0 + 12), sourceFactor_32x4);
1418 
1419  const float32x4_t source1_A_f_32x4 = vmulq_f32(vld1q_f32(source1 + 0), sourceFactor_32x4);
1420  const float32x4_t source1_B_f_32x4 = vmulq_f32(vld1q_f32(source1 + 4), sourceFactor_32x4);
1421  const float32x4_t source1_C_f_32x4 = vmulq_f32(vld1q_f32(source1 + 8), sourceFactor_32x4);
1422  const float32x4_t source1_D_f_32x4 = vmulq_f32(vld1q_f32(source1 + 12), sourceFactor_32x4);
1423 
1424  const float32x4_t source2_A_f_32x4 = vmulq_f32(vld1q_f32(source2 + 0), sourceFactor_32x4);
1425  const float32x4_t source2_B_f_32x4 = vmulq_f32(vld1q_f32(source2 + 4), sourceFactor_32x4);
1426  const float32x4_t source2_C_f_32x4 = vmulq_f32(vld1q_f32(source2 + 8), sourceFactor_32x4);
1427  const float32x4_t source2_D_f_32x4 = vmulq_f32(vld1q_f32(source2 + 12), sourceFactor_32x4);
1428 
1429  const float32x4_t source3_A_f_32x4 = vmulq_f32(vld1q_f32(source3 + 0), sourceFactor_32x4);
1430  const float32x4_t source3_B_f_32x4 = vmulq_f32(vld1q_f32(source3 + 4), sourceFactor_32x4);
1431  const float32x4_t source3_C_f_32x4 = vmulq_f32(vld1q_f32(source3 + 8), sourceFactor_32x4);
1432  const float32x4_t source3_D_f_32x4 = vmulq_f32(vld1q_f32(source3 + 12), sourceFactor_32x4);
1433 
1434  target_8x16x4.val[0] = NEON::cast16ElementsNEON(source0_A_f_32x4, source0_B_f_32x4, source0_C_f_32x4, source0_D_f_32x4);
1435  target_8x16x4.val[1] = NEON::cast16ElementsNEON(source1_A_f_32x4, source1_B_f_32x4, source1_C_f_32x4, source1_D_f_32x4);
1436  target_8x16x4.val[2] = NEON::cast16ElementsNEON(source2_A_f_32x4, source2_B_f_32x4, source2_C_f_32x4, source2_D_f_32x4);
1437  target_8x16x4.val[3] = NEON::cast16ElementsNEON(source3_A_f_32x4, source3_B_f_32x4, source3_C_f_32x4, source3_D_f_32x4);
1438 
1439  vst4q_u8(target, target_8x16x4);
1440 
1441  source0 += tBlockSize;
1442  source1 += tBlockSize;
1443  source2 += tBlockSize;
1444  source3 += tBlockSize;
1445 
1446  target += tBlockSize * tChannels;
1447  }
1448 
1449  for (unsigned int n = 0u; n < remaining; ++n)
1450  {
1451  ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
1452  ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
1453  ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
1454  ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
1455 
1456  target[n * tChannels + 0u] = uint8_t(source0[n] * sourceFactor);
1457  target[n * tChannels + 1u] = uint8_t(source1[n] * sourceFactor);
1458  target[n * tChannels + 2u] = uint8_t(source2[n] * sourceFactor);
1459  target[n * tChannels + 3u] = uint8_t(source3[n] * sourceFactor);
1460  }
1461 
1462  source0 += remaining + sourceFrame0PaddingElements;
1463  source1 += remaining + sourceFrame1PaddingElements;
1464  source2 += remaining + sourceFrame2PaddingElements;
1465  source3 += remaining + sourceFrame3PaddingElements;
1466  target += remaining * tChannels + targetFramePaddingElements;
1467  }
1468  }
1469 }
1470 
1471 #endif // OCEAN_HARDWARE_NEON_VERSION
1472 
1473 }
1474 
1475 }
1476 
1477 }
1478 
1479 #endif // META_OCEAN_CV_ADVANCED_ADVANCED_FRAME_CHANNLES_H
This class implements advanced frame channel conversion, transformation and extraction functions.
Definition: AdvancedFrameChannels.h:30
static void zipChannels(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition: AdvancedFrameChannels.h:378
static void zipChannelsRuntime(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition: AdvancedFrameChannels.h:571
static void zipChannelsOnlySourceFactorNEON(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const TSource sourceFactor, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
static void separateTo1ChannelOnlyTargetFactorNEON(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static void separateTo1ChannelRuntime(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition: AdvancedFrameChannels.h:495
static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME
Definition of a constant to specify that the number of channels are not known at compile time but at ...
Definition: AdvancedFrameChannels.h:36
static void separateTo1Channel(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition: AdvancedFrameChannels.h:254
static OCEAN_FORCE_INLINE uint8x16_t cast16ElementsNEON(const float32x4_t &sourceA_f_32x4, const float32x4_t &sourceB_f_32x4, const float32x4_t &sourceC_f_32x4, const float32x4_t &sourceD_f_32x4)
Casts 16 float elements to 16 uint8_t elements.
Definition: NEON.h:1208
std::vector< Index32 > Indices32
Definition of a vector holding 32 bit index values.
Definition: Base.h:96
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15