8 #ifndef META_OCEAN_CV_ADVANCED_ADVANCED_FRAME_CHANNLES_H
9 #define META_OCEAN_CV_ADVANCED_ADVANCED_FRAME_CHANNLES_H
36 static constexpr
unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME = 0u;
72 template <
typename TSource,
typename TTarget,
unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
73 static void separateTo1Channel(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const TSource sourceFactor,
const TTarget targetFactor,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements);
107 template <
typename TSource,
typename TTarget>
108 static void separateTo1Channel(
const TSource*
const sourceFrame,
const std::initializer_list<TTarget*>& targetFrames,
const unsigned int width,
const unsigned int height,
const TSource sourceFactor,
const TTarget targetFactor,
const unsigned int sourceFramePaddingElements,
const std::initializer_list<const unsigned int>& targetFramesPaddingElements);
142 template <
typename TSource,
typename TTarget,
unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
143 static void zipChannels(
const TSource*
const*
const sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const TSource sourceFactor,
const TTarget targetFactor,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements);
177 template <
typename TSource,
typename TTarget>
178 static void zipChannels(
const std::initializer_list<const TSource*>& sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const TSource sourceFactor,
const TTarget targetFactor,
const std::initializer_list<unsigned int>& sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements);
196 template <
typename TSource,
typename TTarget>
197 static void separateTo1ChannelRuntime(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const TSource sourceFactor,
const TTarget targetFactor,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements);
213 template <
typename TSource,
typename TTarget>
214 static void zipChannelsRuntime(
const TSource*
const*
const sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const TSource sourceFactor,
const TTarget targetFactor,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements);
216 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
231 template <
typename TSource,
typename TTarget,
unsigned int tChannels>
232 static void separateTo1ChannelOnlyTargetFactorNEON(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const TTarget targetFactor,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements);
247 template <
typename TSource,
typename TTarget,
unsigned int tChannels>
248 static void zipChannelsOnlySourceFactorNEON(
const TSource*
const*
const sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const TSource sourceFactor,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements);
253 template <
typename TSource,
typename TTarget,
unsigned int tChannels>
254 void AdvancedFrameChannels::separateTo1Channel(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const TSource sourceFactor,
const TTarget targetFactor,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
256 ocean_assert(sourceFrame !=
nullptr);
257 ocean_assert(targetFrames !=
nullptr);
259 ocean_assert(width != 0u && height != 0u);
261 assert(sourceFactor != TSource(0) && targetFactor != TTarget(0));
262 assert(sourceFactor != TSource(1) || targetFactor != TTarget(1));
268 separateTo1ChannelRuntime<TSource, TTarget>(sourceFrame, targetFrames, width, height, channels, sourceFactor, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements);
272 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
274 if constexpr (std::is_same<TSource, uint8_t>::value && std::is_same<TTarget, float>::value && tChannels >= 2u && tChannels <= 4u)
276 if (sourceFactor == uint8_t(1))
278 separateTo1ChannelOnlyTargetFactorNEON<TSource, TTarget, tChannels>(sourceFrame, targetFrames, width, height, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements);
287 for (
unsigned int c = 0u; c < tChannels; ++c)
289 ocean_assert(targetFrames[c] !=
nullptr);
293 if (sourceFramePaddingElements == 0u && targetFramesPaddingElements ==
nullptr)
295 for (
unsigned int n = 0u; n < width * height; ++n)
297 for (
unsigned int c = 0u; c < tChannels; ++c)
299 targetFrames[c][n] = TTarget(sourceFrame[n * tChannels + c] * sourceFactor) * targetFactor;
303 else if (targetFramesPaddingElements ==
nullptr)
305 ocean_assert(sourceFramePaddingElements != 0u);
307 const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
309 for (
unsigned int y = 0u; y < height; ++y)
311 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
313 const unsigned int targetRowOffset = y * width;
315 for (
unsigned int x = 0u; x < width; ++x)
317 for (
unsigned int c = 0u; c < tChannels; ++c)
319 *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * tChannels + c) * sourceFactor) * targetFactor;
326 const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
328 Indices32 targetFrameStrideElements(tChannels);
330 for (
unsigned int c = 0u; c < tChannels; ++c)
332 targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
335 for (
unsigned int y = 0u; y < height; ++y)
337 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
339 for (
unsigned int x = 0u; x < width; ++x)
341 for (
unsigned int c = 0u; c < tChannels; ++c)
343 *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * tChannels + c) * sourceFactor) * targetFactor;
350 template <
typename TSource,
typename TTarget>
351 void AdvancedFrameChannels::separateTo1Channel(
const TSource*
const sourceFrame,
const std::initializer_list<TTarget*>& targetFrames,
const unsigned int width,
const unsigned int height,
const TSource sourceFactor,
const TTarget targetFactor,
const unsigned int sourceFramePaddingElements,
const std::initializer_list<const unsigned int>& targetFramesPaddingElements)
353 ocean_assert(targetFrames.size() >= 1);
354 ocean_assert(targetFramesPaddingElements.size() == 0 || targetFrames.size() == targetFramesPaddingElements.size());
356 assert(sourceFactor != TSource(0) && targetFactor != TTarget(0));
357 assert(sourceFactor != TSource(1) || targetFactor != TTarget(1));
359 if (targetFrames.size() == 2)
361 separateTo1Channel<TSource, TTarget, 2u>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
363 else if (targetFrames.size() == 3)
365 separateTo1Channel<TSource, TTarget, 3u>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
367 else if (targetFrames.size() == 4)
369 separateTo1Channel<TSource, TTarget, 4u>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
373 separateTo1Channel<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
377 template <
typename TSource,
typename TTarget,
unsigned int tChannels>
378 void AdvancedFrameChannels::zipChannels(
const TSource*
const* sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const TSource sourceFactor,
const TTarget targetFactor,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
380 ocean_assert(sourceFrames !=
nullptr);
381 ocean_assert(targetFrame !=
nullptr);
383 ocean_assert(width != 0u && height != 0u);
385 assert(sourceFactor != TSource(0) && targetFactor != TTarget(0));
386 assert(sourceFactor != TSource(1) || targetFactor != TTarget(1));
392 zipChannelsRuntime<TSource, TTarget>(sourceFrames, targetFrame, width, height, channels, sourceFactor, targetFactor, sourceFramesPaddingElements, targetFramePaddingElements);
396 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
398 if constexpr (std::is_same<TSource, float>::value && std::is_same<TTarget, uint8_t>::value && tChannels >= 2u && tChannels <= 4u)
400 if (targetFactor == uint8_t(1))
402 zipChannelsOnlySourceFactorNEON<TSource, TTarget, tChannels>(sourceFrames, targetFrame, width, height, sourceFactor, sourceFramesPaddingElements, targetFramePaddingElements);
410 bool allSourceFramesContinuous =
true;
412 if (sourceFramesPaddingElements !=
nullptr)
414 for (
unsigned int n = 0u; n < tChannels; ++n)
416 if (sourceFramesPaddingElements[n] != 0u)
418 allSourceFramesContinuous =
false;
424 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
426 for (
unsigned int n = 0u; n < width * height; ++n)
428 for (
unsigned int c = 0u; c < tChannels; ++c)
430 targetFrame[n * tChannels + c] = TTarget(sourceFrames[c][n] * sourceFactor) * targetFactor;
436 const unsigned int targetFrameStrideElements = width * tChannels + targetFramePaddingElements;
438 Indices32 sourceFrameStrideElements(tChannels);
440 for (
unsigned int c = 0u; c < tChannels; ++c)
442 if (sourceFramesPaddingElements ==
nullptr)
444 sourceFrameStrideElements[c] = width;
448 sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
452 for (
unsigned int y = 0u; y < height; ++y)
454 TTarget*
const targetRow = targetFrame + y * targetFrameStrideElements;
456 for (
unsigned int x = 0u; x < width; ++x)
458 for (
unsigned int c = 0u; c < tChannels; ++c)
460 *(targetRow + x * tChannels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x) * sourceFactor) * targetFactor;
467 template <
typename TSource,
typename TTarget>
468 void AdvancedFrameChannels::zipChannels(
const std::initializer_list<const TSource*>& sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const TSource sourceFactor,
const TTarget targetFactor,
const std::initializer_list<unsigned int>& sourceFramePaddingElements,
const unsigned int targetFramePaddingElements)
470 ocean_assert(sourceFrames.size() >= 1);
471 ocean_assert(sourceFramePaddingElements.size() == 0 || sourceFrames.size() == sourceFramePaddingElements.size());
473 assert(sourceFactor != TSource(0) && targetFactor != TTarget(0));
474 assert(sourceFactor != TSource(1) || targetFactor != TTarget(1));
476 if (sourceFrames.size() == 2)
478 zipChannels<TSource, TTarget, 2u>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
480 else if (sourceFrames.size() == 3)
482 zipChannels<TSource, TTarget, 3u>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
484 else if (sourceFrames.size() == 4)
486 zipChannels<TSource, TTarget, 4u>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
490 zipChannels<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFactor, targetFactor, sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
494 template <
typename TSource,
typename TTarget>
495 void AdvancedFrameChannels::separateTo1ChannelRuntime(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const TSource sourceFactor,
const TTarget targetFactor,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
497 ocean_assert(sourceFrame !=
nullptr);
498 ocean_assert(targetFrames !=
nullptr);
500 ocean_assert(width != 0u && height != 0u);
501 ocean_assert(channels != 0u);
503 assert(sourceFactor != TSource(0) && targetFactor != TTarget(0));
504 assert(sourceFactor != TSource(1) || targetFactor != TTarget(1));
507 for (
unsigned int c = 0u; c < channels; ++c)
509 ocean_assert(targetFrames[c] !=
nullptr);
513 if (sourceFramePaddingElements == 0u && targetFramesPaddingElements ==
nullptr)
515 for (
unsigned int n = 0u; n < width * height; ++n)
517 for (
unsigned int c = 0u; c < channels; ++c)
519 targetFrames[c][n] = TTarget(sourceFrame[n * channels + c] * sourceFactor) * targetFactor;
523 else if (targetFramesPaddingElements ==
nullptr)
525 ocean_assert(sourceFramePaddingElements != 0u);
527 const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
529 for (
unsigned int y = 0u; y < height; ++y)
531 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
533 const unsigned int targetRowOffset = y * width;
535 for (
unsigned int x = 0u; x < width; ++x)
537 for (
unsigned int c = 0u; c < channels; ++c)
539 *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * channels + c) * sourceFactor) * targetFactor;
546 const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
548 Indices32 targetFrameStrideElements(channels);
550 for (
unsigned int c = 0u; c < channels; ++c)
552 targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
555 for (
unsigned int y = 0u; y < height; ++y)
557 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
559 for (
unsigned int x = 0u; x < width; ++x)
561 for (
unsigned int c = 0u; c < channels; ++c)
563 *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * channels + c) * sourceFactor) * targetFactor;
570 template <
typename TSource,
typename TTarget>
571 void AdvancedFrameChannels::zipChannelsRuntime(
const TSource*
const* sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const TSource sourceFactor,
const TTarget targetFactor,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
573 ocean_assert(sourceFrames !=
nullptr);
574 ocean_assert(targetFrame !=
nullptr);
576 ocean_assert(width != 0u && height != 0u);
577 ocean_assert(channels != 0u);
579 assert(sourceFactor != TSource(0) && targetFactor != TTarget(0));
580 assert(sourceFactor != TSource(1) || targetFactor != TTarget(1));
582 bool allSourceFramesContinuous =
true;
584 if (sourceFramesPaddingElements !=
nullptr)
586 for (
unsigned int n = 0u; n < channels; ++n)
588 if (sourceFramesPaddingElements[n] != 0u)
590 allSourceFramesContinuous =
false;
596 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
598 for (
unsigned int n = 0u; n < width * height; ++n)
600 for (
unsigned int c = 0u; c < channels; ++c)
602 targetFrame[n * channels + c] = TTarget(sourceFrames[c][n] * sourceFactor) * targetFactor;
608 const unsigned int targetFrameStrideElements = width * channels + targetFramePaddingElements;
610 Indices32 sourceFrameStrideElements(channels);
612 for (
unsigned int c = 0u; c < channels; ++c)
614 if (sourceFramesPaddingElements ==
nullptr)
616 sourceFrameStrideElements[c] = width;
620 sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
624 for (
unsigned int y = 0u; y < height; ++y)
626 TTarget*
const targetRow = targetFrame + y * targetFrameStrideElements;
628 for (
unsigned int x = 0u; x < width; ++x)
630 for (
unsigned int c = 0u; c < channels; ++c)
632 *(targetRow + x * channels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x) * sourceFactor) * targetFactor;
639 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
642 inline void AdvancedFrameChannels::separateTo1ChannelOnlyTargetFactorNEON<uint8_t, float, 2u>(
const uint8_t*
const sourceFrame,
float*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const float targetFactor,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
644 ocean_assert(sourceFrame !=
nullptr);
645 ocean_assert(targetFrames !=
nullptr);
647 ocean_assert(width != 0u && height != 0u);
649 ocean_assert(targetFactor != 0.0f);
651 constexpr
unsigned int tChannels = 2u;
653 bool allTargetFramesContinuous =
true;
655 if (targetFramesPaddingElements !=
nullptr)
657 for (
unsigned int n = 0u; n < tChannels; ++n)
659 if (targetFramesPaddingElements[n] != 0u)
661 allTargetFramesContinuous =
false;
667 const float32x4_t targetFactor_32x4 = vdupq_n_f32(targetFactor);
669 const uint8_t* source = sourceFrame;
670 float* target0 = targetFrames[0];
671 float* target1 = targetFrames[1];
673 constexpr
unsigned int tBlockSize = 16u;
675 uint8x16x2_t source_u_8x16x2;
677 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
679 const unsigned int pixels = width * height;
680 const unsigned int blocks = pixels / tBlockSize;
681 const unsigned int remaining = pixels % tBlockSize;
683 for (
unsigned int n = 0u; n < blocks; ++n)
685 source_u_8x16x2 = vld2q_u8(source);
690 vst1q_f32(target0 + 0, vmulq_f32(sourceA_f_32x4x4.val[0], targetFactor_32x4));
691 vst1q_f32(target0 + 4, vmulq_f32(sourceA_f_32x4x4.val[1], targetFactor_32x4));
692 vst1q_f32(target0 + 8, vmulq_f32(sourceA_f_32x4x4.val[2], targetFactor_32x4));
693 vst1q_f32(target0 + 12, vmulq_f32(sourceA_f_32x4x4.val[3], targetFactor_32x4));
695 vst1q_f32(target1 + 0, vmulq_f32(sourceB_f_32x4x4.val[0], targetFactor_32x4));
696 vst1q_f32(target1 + 4, vmulq_f32(sourceB_f_32x4x4.val[1], targetFactor_32x4));
697 vst1q_f32(target1 + 8, vmulq_f32(sourceB_f_32x4x4.val[2], targetFactor_32x4));
698 vst1q_f32(target1 + 12, vmulq_f32(sourceB_f_32x4x4.val[3], targetFactor_32x4));
700 source += tBlockSize * tChannels;
702 target0 += tBlockSize;
703 target1 += tBlockSize;
706 for (
unsigned int n = 0u; n < remaining; ++n)
708 target0[n] = float(source[n * tChannels + 0u]) * targetFactor;
709 target1[n] = float(source[n * tChannels + 1u]) * targetFactor;
714 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[0];
715 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[1];
717 const unsigned int blocks = width / tBlockSize;
718 const unsigned int remaining = width % tBlockSize;
720 for (
unsigned int y = 0u; y < height; ++y)
722 for (
unsigned int n = 0u; n < blocks; ++n)
724 source_u_8x16x2 = vld2q_u8(source);
729 vst1q_f32(target0 + 0, vmulq_f32(sourceA_f_32x4x4.val[0], targetFactor_32x4));
730 vst1q_f32(target0 + 4, vmulq_f32(sourceA_f_32x4x4.val[1], targetFactor_32x4));
731 vst1q_f32(target0 + 8, vmulq_f32(sourceA_f_32x4x4.val[2], targetFactor_32x4));
732 vst1q_f32(target0 + 12, vmulq_f32(sourceA_f_32x4x4.val[3], targetFactor_32x4));
734 vst1q_f32(target1 + 0, vmulq_f32(sourceB_f_32x4x4.val[0], targetFactor_32x4));
735 vst1q_f32(target1 + 4, vmulq_f32(sourceB_f_32x4x4.val[1], targetFactor_32x4));
736 vst1q_f32(target1 + 8, vmulq_f32(sourceB_f_32x4x4.val[2], targetFactor_32x4));
737 vst1q_f32(target1 + 12, vmulq_f32(sourceB_f_32x4x4.val[3], targetFactor_32x4));
739 source += tBlockSize * tChannels;
741 target0 += tBlockSize;
742 target1 += tBlockSize;
745 for (
unsigned int n = 0u; n < remaining; ++n)
747 target0[n] = float(source[n * tChannels + 0u]) * targetFactor;
748 target1[n] = float(source[n * tChannels + 1u]) * targetFactor;
751 source += remaining * tChannels + sourceFramePaddingElements;
752 target0 += remaining + targetFrame0PaddingElements;
753 target1 += remaining + targetFrame1PaddingElements;
759 inline void AdvancedFrameChannels::separateTo1ChannelOnlyTargetFactorNEON<uint8_t, float, 3u>(
const uint8_t*
const sourceFrame,
float*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const float targetFactor,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
761 ocean_assert(sourceFrame !=
nullptr);
762 ocean_assert(targetFrames !=
nullptr);
764 ocean_assert(width != 0u && height != 0u);
766 constexpr
unsigned int tChannels = 3u;
768 bool allTargetFramesContinuous =
true;
770 if (targetFramesPaddingElements !=
nullptr)
772 for (
unsigned int n = 0u; n < tChannels; ++n)
774 if (targetFramesPaddingElements[n] != 0u)
776 allTargetFramesContinuous =
false;
782 const float32x4_t targetFactor_32x4 = vdupq_n_f32(targetFactor);
784 const uint8_t* source = sourceFrame;
785 float* target0 = targetFrames[0];
786 float* target1 = targetFrames[1];
787 float* target2 = targetFrames[2];
789 constexpr
unsigned int tBlockSize = 16u;
791 uint8x16x3_t source_u_8x16x3;
793 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
795 const unsigned int pixels = width * height;
796 const unsigned int blocks = pixels / tBlockSize;
797 const unsigned int remaining = pixels % tBlockSize;
799 for (
unsigned int n = 0u; n < blocks; ++n)
801 source_u_8x16x3 = vld3q_u8(source);
807 vst1q_f32(target0 + 0, vmulq_f32(sourceA_f_32x4x4.val[0], targetFactor_32x4));
808 vst1q_f32(target0 + 4, vmulq_f32(sourceA_f_32x4x4.val[1], targetFactor_32x4));
809 vst1q_f32(target0 + 8, vmulq_f32(sourceA_f_32x4x4.val[2], targetFactor_32x4));
810 vst1q_f32(target0 + 12, vmulq_f32(sourceA_f_32x4x4.val[3], targetFactor_32x4));
812 vst1q_f32(target1 + 0, vmulq_f32(sourceB_f_32x4x4.val[0], targetFactor_32x4));
813 vst1q_f32(target1 + 4, vmulq_f32(sourceB_f_32x4x4.val[1], targetFactor_32x4));
814 vst1q_f32(target1 + 8, vmulq_f32(sourceB_f_32x4x4.val[2], targetFactor_32x4));
815 vst1q_f32(target1 + 12, vmulq_f32(sourceB_f_32x4x4.val[3], targetFactor_32x4));
817 vst1q_f32(target2 + 0, vmulq_f32(sourceC_f_32x4x4.val[0], targetFactor_32x4));
818 vst1q_f32(target2 + 4, vmulq_f32(sourceC_f_32x4x4.val[1], targetFactor_32x4));
819 vst1q_f32(target2 + 8, vmulq_f32(sourceC_f_32x4x4.val[2], targetFactor_32x4));
820 vst1q_f32(target2 + 12, vmulq_f32(sourceC_f_32x4x4.val[3], targetFactor_32x4));
822 source += tBlockSize * tChannels;
824 target0 += tBlockSize;
825 target1 += tBlockSize;
826 target2 += tBlockSize;
829 for (
unsigned int n = 0u; n < remaining; ++n)
831 target0[n] = float(source[n * tChannels + 0u]) * targetFactor;
832 target1[n] = float(source[n * tChannels + 1u]) * targetFactor;
833 target2[n] = float(source[n * tChannels + 2u]) * targetFactor;
838 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[0];
839 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[1];
840 const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[2];
842 const unsigned int blocks = width / tBlockSize;
843 const unsigned int remaining = width % tBlockSize;
845 for (
unsigned int y = 0u; y < height; ++y)
847 for (
unsigned int n = 0u; n < blocks; ++n)
849 source_u_8x16x3 = vld3q_u8(source);
855 vst1q_f32(target0 + 0, vmulq_f32(sourceA_f_32x4x4.val[0], targetFactor_32x4));
856 vst1q_f32(target0 + 4, vmulq_f32(sourceA_f_32x4x4.val[1], targetFactor_32x4));
857 vst1q_f32(target0 + 8, vmulq_f32(sourceA_f_32x4x4.val[2], targetFactor_32x4));
858 vst1q_f32(target0 + 12, vmulq_f32(sourceA_f_32x4x4.val[3], targetFactor_32x4));
860 vst1q_f32(target1 + 0, vmulq_f32(sourceB_f_32x4x4.val[0], targetFactor_32x4));
861 vst1q_f32(target1 + 4, vmulq_f32(sourceB_f_32x4x4.val[1], targetFactor_32x4));
862 vst1q_f32(target1 + 8, vmulq_f32(sourceB_f_32x4x4.val[2], targetFactor_32x4));
863 vst1q_f32(target1 + 12, vmulq_f32(sourceB_f_32x4x4.val[3], targetFactor_32x4));
865 vst1q_f32(target2 + 0, vmulq_f32(sourceC_f_32x4x4.val[0], targetFactor_32x4));
866 vst1q_f32(target2 + 4, vmulq_f32(sourceC_f_32x4x4.val[1], targetFactor_32x4));
867 vst1q_f32(target2 + 8, vmulq_f32(sourceC_f_32x4x4.val[2], targetFactor_32x4));
868 vst1q_f32(target2 + 12, vmulq_f32(sourceC_f_32x4x4.val[3], targetFactor_32x4));
870 source += tBlockSize * tChannels;
872 target0 += tBlockSize;
873 target1 += tBlockSize;
874 target2 += tBlockSize;
877 for (
unsigned int n = 0u; n < remaining; ++n)
879 target0[n] = float(source[n * tChannels + 0u]) * targetFactor;
880 target1[n] = float(source[n * tChannels + 1u]) * targetFactor;
881 target2[n] = float(source[n * tChannels + 2u]) * targetFactor;
884 source += remaining * tChannels + sourceFramePaddingElements;
885 target0 += remaining + targetFrame0PaddingElements;
886 target1 += remaining + targetFrame1PaddingElements;
887 target2 += remaining + targetFrame2PaddingElements;
893 inline void AdvancedFrameChannels::separateTo1ChannelOnlyTargetFactorNEON<uint8_t, float, 4u>(
const uint8_t*
const sourceFrame,
float*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const float targetFactor,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
895 ocean_assert(sourceFrame !=
nullptr);
896 ocean_assert(targetFrames !=
nullptr);
898 ocean_assert(width != 0u && height != 0u);
900 constexpr
unsigned int tChannels = 4u;
902 bool allTargetFramesContinuous =
true;
904 if (targetFramesPaddingElements !=
nullptr)
906 for (
unsigned int n = 0u; n < tChannels; ++n)
908 if (targetFramesPaddingElements[n] != 0u)
910 allTargetFramesContinuous =
false;
916 const float32x4_t targetFactor_32x4 = vdupq_n_f32(targetFactor);
918 const uint8_t* source = sourceFrame;
919 float* target0 = targetFrames[0];
920 float* target1 = targetFrames[1];
921 float* target2 = targetFrames[2];
922 float* target3 = targetFrames[3];
924 constexpr
unsigned int tBlockSize = 16u;
926 uint8x16x4_t source_u_8x16x4;
928 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
930 const unsigned int pixels = width * height;
931 const unsigned int blocks = pixels / tBlockSize;
932 const unsigned int remaining = pixels % tBlockSize;
934 for (
unsigned int n = 0u; n < blocks; ++n)
936 source_u_8x16x4 = vld4q_u8(source);
943 vst1q_f32(target0 + 0, vmulq_f32(sourceA_f_32x4x4.val[0], targetFactor_32x4));
944 vst1q_f32(target0 + 4, vmulq_f32(sourceA_f_32x4x4.val[1], targetFactor_32x4));
945 vst1q_f32(target0 + 8, vmulq_f32(sourceA_f_32x4x4.val[2], targetFactor_32x4));
946 vst1q_f32(target0 + 12, vmulq_f32(sourceA_f_32x4x4.val[3], targetFactor_32x4));
948 vst1q_f32(target1 + 0, vmulq_f32(sourceB_f_32x4x4.val[0], targetFactor_32x4));
949 vst1q_f32(target1 + 4, vmulq_f32(sourceB_f_32x4x4.val[1], targetFactor_32x4));
950 vst1q_f32(target1 + 8, vmulq_f32(sourceB_f_32x4x4.val[2], targetFactor_32x4));
951 vst1q_f32(target1 + 12, vmulq_f32(sourceB_f_32x4x4.val[3], targetFactor_32x4));
953 vst1q_f32(target2 + 0, vmulq_f32(sourceC_f_32x4x4.val[0], targetFactor_32x4));
954 vst1q_f32(target2 + 4, vmulq_f32(sourceC_f_32x4x4.val[1], targetFactor_32x4));
955 vst1q_f32(target2 + 8, vmulq_f32(sourceC_f_32x4x4.val[2], targetFactor_32x4));
956 vst1q_f32(target2 + 12, vmulq_f32(sourceC_f_32x4x4.val[3], targetFactor_32x4));
958 vst1q_f32(target3 + 0, vmulq_f32(sourceD_f_32x4x4.val[0], targetFactor_32x4));
959 vst1q_f32(target3 + 4, vmulq_f32(sourceD_f_32x4x4.val[1], targetFactor_32x4));
960 vst1q_f32(target3 + 8, vmulq_f32(sourceD_f_32x4x4.val[2], targetFactor_32x4));
961 vst1q_f32(target3 + 12, vmulq_f32(sourceD_f_32x4x4.val[3], targetFactor_32x4));
963 source += tBlockSize * tChannels;
965 target0 += tBlockSize;
966 target1 += tBlockSize;
967 target2 += tBlockSize;
968 target3 += tBlockSize;
971 for (
unsigned int n = 0u; n < remaining; ++n)
973 target0[n] = float(source[n * tChannels + 0u]) * targetFactor;
974 target1[n] = float(source[n * tChannels + 1u]) * targetFactor;
975 target2[n] = float(source[n * tChannels + 2u]) * targetFactor;
976 target3[n] = float(source[n * tChannels + 3u]) * targetFactor;
981 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[0];
982 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[1];
983 const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[2];
984 const unsigned int targetFrame3PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[3];
986 const unsigned int blocks = width / tBlockSize;
987 const unsigned int remaining = width % tBlockSize;
989 for (
unsigned int y = 0u; y < height; ++y)
991 for (
unsigned int n = 0u; n < blocks; ++n)
993 source_u_8x16x4 = vld4q_u8(source);
1000 vst1q_f32(target0 + 0, vmulq_f32(sourceA_f_32x4x4.val[0], targetFactor_32x4));
1001 vst1q_f32(target0 + 4, vmulq_f32(sourceA_f_32x4x4.val[1], targetFactor_32x4));
1002 vst1q_f32(target0 + 8, vmulq_f32(sourceA_f_32x4x4.val[2], targetFactor_32x4));
1003 vst1q_f32(target0 + 12, vmulq_f32(sourceA_f_32x4x4.val[3], targetFactor_32x4));
1005 vst1q_f32(target1 + 0, vmulq_f32(sourceB_f_32x4x4.val[0], targetFactor_32x4));
1006 vst1q_f32(target1 + 4, vmulq_f32(sourceB_f_32x4x4.val[1], targetFactor_32x4));
1007 vst1q_f32(target1 + 8, vmulq_f32(sourceB_f_32x4x4.val[2], targetFactor_32x4));
1008 vst1q_f32(target1 + 12, vmulq_f32(sourceB_f_32x4x4.val[3], targetFactor_32x4));
1010 vst1q_f32(target2 + 0, vmulq_f32(sourceC_f_32x4x4.val[0], targetFactor_32x4));
1011 vst1q_f32(target2 + 4, vmulq_f32(sourceC_f_32x4x4.val[1], targetFactor_32x4));
1012 vst1q_f32(target2 + 8, vmulq_f32(sourceC_f_32x4x4.val[2], targetFactor_32x4));
1013 vst1q_f32(target2 + 12, vmulq_f32(sourceC_f_32x4x4.val[3], targetFactor_32x4));
1015 vst1q_f32(target3 + 0, vmulq_f32(sourceD_f_32x4x4.val[0], targetFactor_32x4));
1016 vst1q_f32(target3 + 4, vmulq_f32(sourceD_f_32x4x4.val[1], targetFactor_32x4));
1017 vst1q_f32(target3 + 8, vmulq_f32(sourceD_f_32x4x4.val[2], targetFactor_32x4));
1018 vst1q_f32(target3 + 12, vmulq_f32(sourceD_f_32x4x4.val[3], targetFactor_32x4));
1020 source += tBlockSize * tChannels;
1022 target0 += tBlockSize;
1023 target1 += tBlockSize;
1024 target2 += tBlockSize;
1025 target3 += tBlockSize;
1028 for (
unsigned int n = 0u; n < remaining; ++n)
1030 target0[n] = float(source[n * tChannels + 0u]) * targetFactor;
1031 target1[n] = float(source[n * tChannels + 1u]) * targetFactor;
1032 target2[n] = float(source[n * tChannels + 2u]) * targetFactor;
1033 target3[n] = float(source[n * tChannels + 3u]) * targetFactor;
1036 source += remaining * tChannels + sourceFramePaddingElements;
1037 target0 += remaining + targetFrame0PaddingElements;
1038 target1 += remaining + targetFrame1PaddingElements;
1039 target2 += remaining + targetFrame2PaddingElements;
1040 target3 += remaining + targetFrame3PaddingElements;
1046 void AdvancedFrameChannels::zipChannelsOnlySourceFactorNEON<float, uint8_t, 2u>(
const float*
const*
const sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const float sourceFactor,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
1048 ocean_assert(sourceFrames !=
nullptr);
1049 ocean_assert(targetFrame !=
nullptr);
1051 ocean_assert(width != 0u && height != 0u);
1053 constexpr
unsigned int tChannels = 2u;
1055 bool allSourceFramesContinuous =
true;
1057 if (sourceFramesPaddingElements !=
nullptr)
1059 for (
unsigned int n = 0u; n < tChannels; ++n)
1061 if (sourceFramesPaddingElements[n] != 0u)
1063 allSourceFramesContinuous =
false;
1069 const float32x4_t sourceFactor_32x4 = vdupq_n_f32(sourceFactor);
1071 const float* source0 = sourceFrames[0];
1072 const float* source1 = sourceFrames[1];
1073 uint8_t* target = targetFrame;
1075 constexpr
unsigned int tBlockSize = 16u;
1077 uint8x16x2_t target_8x16x2;
1079 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
1081 const unsigned int pixels = width * height;
1082 const unsigned int blocks = pixels / tBlockSize;
1083 const unsigned int remaining = pixels % tBlockSize;
1085 for (
unsigned int n = 0u; n < blocks; ++n)
1087 const float32x4_t source0_A_f_32x4 = vmulq_f32(vld1q_f32(source0 + 0), sourceFactor_32x4);
1088 const float32x4_t source0_B_f_32x4 = vmulq_f32(vld1q_f32(source0 + 4), sourceFactor_32x4);
1089 const float32x4_t source0_C_f_32x4 = vmulq_f32(vld1q_f32(source0 + 8), sourceFactor_32x4);
1090 const float32x4_t source0_D_f_32x4 = vmulq_f32(vld1q_f32(source0 + 12), sourceFactor_32x4);
1092 const float32x4_t source1_A_f_32x4 = vmulq_f32(vld1q_f32(source1 + 0), sourceFactor_32x4);
1093 const float32x4_t source1_B_f_32x4 = vmulq_f32(vld1q_f32(source1 + 4), sourceFactor_32x4);
1094 const float32x4_t source1_C_f_32x4 = vmulq_f32(vld1q_f32(source1 + 8), sourceFactor_32x4);
1095 const float32x4_t source1_D_f_32x4 = vmulq_f32(vld1q_f32(source1 + 12), sourceFactor_32x4);
1097 target_8x16x2.val[0] =
NEON::cast16ElementsNEON(source0_A_f_32x4, source0_B_f_32x4, source0_C_f_32x4, source0_D_f_32x4);
1098 target_8x16x2.val[1] =
NEON::cast16ElementsNEON(source1_A_f_32x4, source1_B_f_32x4, source1_C_f_32x4, source1_D_f_32x4);
1100 vst2q_u8(target, target_8x16x2);
1102 source0 += tBlockSize;
1103 source1 += tBlockSize;
1105 target += tBlockSize * tChannels;
1108 for (
unsigned int n = 0u; n < remaining; ++n)
1110 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
1111 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
1113 target[n * tChannels + 0u] = uint8_t(source0[n] * sourceFactor);
1114 target[n * tChannels + 1u] = uint8_t(source1[n] * sourceFactor);
1119 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
1120 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
1122 const unsigned int blocks = width / tBlockSize;
1123 const unsigned int remaining = width % tBlockSize;
1125 for (
unsigned int y = 0u; y < height; ++y)
1127 for (
unsigned int n = 0u; n < blocks; ++n)
1129 const float32x4_t source0_A_f_32x4 = vmulq_f32(vld1q_f32(source0 + 0), sourceFactor_32x4);
1130 const float32x4_t source0_B_f_32x4 = vmulq_f32(vld1q_f32(source0 + 4), sourceFactor_32x4);
1131 const float32x4_t source0_C_f_32x4 = vmulq_f32(vld1q_f32(source0 + 8), sourceFactor_32x4);
1132 const float32x4_t source0_D_f_32x4 = vmulq_f32(vld1q_f32(source0 + 12), sourceFactor_32x4);
1134 const float32x4_t source1_A_f_32x4 = vmulq_f32(vld1q_f32(source1 + 0), sourceFactor_32x4);
1135 const float32x4_t source1_B_f_32x4 = vmulq_f32(vld1q_f32(source1 + 4), sourceFactor_32x4);
1136 const float32x4_t source1_C_f_32x4 = vmulq_f32(vld1q_f32(source1 + 8), sourceFactor_32x4);
1137 const float32x4_t source1_D_f_32x4 = vmulq_f32(vld1q_f32(source1 + 12), sourceFactor_32x4);
1139 target_8x16x2.val[0] =
NEON::cast16ElementsNEON(source0_A_f_32x4, source0_B_f_32x4, source0_C_f_32x4, source0_D_f_32x4);
1140 target_8x16x2.val[1] =
NEON::cast16ElementsNEON(source1_A_f_32x4, source1_B_f_32x4, source1_C_f_32x4, source1_D_f_32x4);
1142 vst2q_u8(target, target_8x16x2);
1144 source0 += tBlockSize;
1145 source1 += tBlockSize;
1147 target += tBlockSize * tChannels;
1150 for (
unsigned int n = 0u; n < remaining; ++n)
1152 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
1153 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
1155 target[n * tChannels + 0u] = uint8_t(source0[n] * sourceFactor);
1156 target[n * tChannels + 1u] = uint8_t(source1[n] * sourceFactor);
1159 source0 += remaining + sourceFrame0PaddingElements;
1160 source1 += remaining + sourceFrame1PaddingElements;
1161 target += remaining * tChannels + targetFramePaddingElements;
1167 void AdvancedFrameChannels::zipChannelsOnlySourceFactorNEON<float, uint8_t, 3u>(
const float*
const*
const sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const float sourceFactor,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
1169 ocean_assert(sourceFrames !=
nullptr);
1170 ocean_assert(targetFrame !=
nullptr);
1172 ocean_assert(width != 0u && height != 0u);
1174 constexpr
unsigned int tChannels = 3u;
1176 bool allSourceFramesContinuous =
true;
1178 if (sourceFramesPaddingElements !=
nullptr)
1180 for (
unsigned int n = 0u; n < tChannels; ++n)
1182 if (sourceFramesPaddingElements[n] != 0u)
1184 allSourceFramesContinuous =
false;
1190 const float32x4_t sourceFactor_32x4 = vdupq_n_f32(sourceFactor);
1192 const float* source0 = sourceFrames[0];
1193 const float* source1 = sourceFrames[1];
1194 const float* source2 = sourceFrames[2];
1195 uint8_t* target = targetFrame;
1197 constexpr
unsigned int tBlockSize = 16u;
1199 uint8x16x3_t target_8x16x3;
1201 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
1203 const unsigned int pixels = width * height;
1204 const unsigned int blocks = pixels / tBlockSize;
1205 const unsigned int remaining = pixels % tBlockSize;
1207 for (
unsigned int n = 0u; n < blocks; ++n)
1209 const float32x4_t source0_A_f_32x4 = vmulq_f32(vld1q_f32(source0 + 0), sourceFactor_32x4);
1210 const float32x4_t source0_B_f_32x4 = vmulq_f32(vld1q_f32(source0 + 4), sourceFactor_32x4);
1211 const float32x4_t source0_C_f_32x4 = vmulq_f32(vld1q_f32(source0 + 8), sourceFactor_32x4);
1212 const float32x4_t source0_D_f_32x4 = vmulq_f32(vld1q_f32(source0 + 12), sourceFactor_32x4);
1214 const float32x4_t source1_A_f_32x4 = vmulq_f32(vld1q_f32(source1 + 0), sourceFactor_32x4);
1215 const float32x4_t source1_B_f_32x4 = vmulq_f32(vld1q_f32(source1 + 4), sourceFactor_32x4);
1216 const float32x4_t source1_C_f_32x4 = vmulq_f32(vld1q_f32(source1 + 8), sourceFactor_32x4);
1217 const float32x4_t source1_D_f_32x4 = vmulq_f32(vld1q_f32(source1 + 12), sourceFactor_32x4);
1219 const float32x4_t source2_A_f_32x4 = vmulq_f32(vld1q_f32(source2 + 0), sourceFactor_32x4);
1220 const float32x4_t source2_B_f_32x4 = vmulq_f32(vld1q_f32(source2 + 4), sourceFactor_32x4);
1221 const float32x4_t source2_C_f_32x4 = vmulq_f32(vld1q_f32(source2 + 8), sourceFactor_32x4);
1222 const float32x4_t source2_D_f_32x4 = vmulq_f32(vld1q_f32(source2 + 12), sourceFactor_32x4);
1224 target_8x16x3.val[0] =
NEON::cast16ElementsNEON(source0_A_f_32x4, source0_B_f_32x4, source0_C_f_32x4, source0_D_f_32x4);
1225 target_8x16x3.val[1] =
NEON::cast16ElementsNEON(source1_A_f_32x4, source1_B_f_32x4, source1_C_f_32x4, source1_D_f_32x4);
1226 target_8x16x3.val[2] =
NEON::cast16ElementsNEON(source2_A_f_32x4, source2_B_f_32x4, source2_C_f_32x4, source2_D_f_32x4);
1228 vst3q_u8(target, target_8x16x3);
1230 source0 += tBlockSize;
1231 source1 += tBlockSize;
1232 source2 += tBlockSize;
1234 target += tBlockSize * tChannels;
1237 for (
unsigned int n = 0u; n < remaining; ++n)
1239 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
1240 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
1241 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
1243 target[n * tChannels + 0u] = uint8_t(source0[n] * sourceFactor);
1244 target[n * tChannels + 1u] = uint8_t(source1[n] * sourceFactor);
1245 target[n * tChannels + 2u] = uint8_t(source2[n] * sourceFactor);
1250 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
1251 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
1252 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[2];
1254 const unsigned int blocks = width / tBlockSize;
1255 const unsigned int remaining = width % tBlockSize;
1257 for (
unsigned int y = 0u; y < height; ++y)
1259 for (
unsigned int n = 0u; n < blocks; ++n)
1261 const float32x4_t source0_A_f_32x4 = vmulq_f32(vld1q_f32(source0 + 0), sourceFactor_32x4);
1262 const float32x4_t source0_B_f_32x4 = vmulq_f32(vld1q_f32(source0 + 4), sourceFactor_32x4);
1263 const float32x4_t source0_C_f_32x4 = vmulq_f32(vld1q_f32(source0 + 8), sourceFactor_32x4);
1264 const float32x4_t source0_D_f_32x4 = vmulq_f32(vld1q_f32(source0 + 12), sourceFactor_32x4);
1266 const float32x4_t source1_A_f_32x4 = vmulq_f32(vld1q_f32(source1 + 0), sourceFactor_32x4);
1267 const float32x4_t source1_B_f_32x4 = vmulq_f32(vld1q_f32(source1 + 4), sourceFactor_32x4);
1268 const float32x4_t source1_C_f_32x4 = vmulq_f32(vld1q_f32(source1 + 8), sourceFactor_32x4);
1269 const float32x4_t source1_D_f_32x4 = vmulq_f32(vld1q_f32(source1 + 12), sourceFactor_32x4);
1271 const float32x4_t source2_A_f_32x4 = vmulq_f32(vld1q_f32(source2 + 0), sourceFactor_32x4);
1272 const float32x4_t source2_B_f_32x4 = vmulq_f32(vld1q_f32(source2 + 4), sourceFactor_32x4);
1273 const float32x4_t source2_C_f_32x4 = vmulq_f32(vld1q_f32(source2 + 8), sourceFactor_32x4);
1274 const float32x4_t source2_D_f_32x4 = vmulq_f32(vld1q_f32(source2 + 12), sourceFactor_32x4);
1276 target_8x16x3.val[0] =
NEON::cast16ElementsNEON(source0_A_f_32x4, source0_B_f_32x4, source0_C_f_32x4, source0_D_f_32x4);
1277 target_8x16x3.val[1] =
NEON::cast16ElementsNEON(source1_A_f_32x4, source1_B_f_32x4, source1_C_f_32x4, source1_D_f_32x4);
1278 target_8x16x3.val[2] =
NEON::cast16ElementsNEON(source2_A_f_32x4, source2_B_f_32x4, source2_C_f_32x4, source2_D_f_32x4);
1280 vst3q_u8(target, target_8x16x3);
1282 source0 += tBlockSize;
1283 source1 += tBlockSize;
1284 source2 += tBlockSize;
1286 target += tBlockSize * tChannels;
1289 for (
unsigned int n = 0u; n < remaining; ++n)
1291 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
1292 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
1293 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
1295 target[n * tChannels + 0u] = uint8_t(source0[n] * sourceFactor);
1296 target[n * tChannels + 1u] = uint8_t(source1[n] * sourceFactor);
1297 target[n * tChannels + 2u] = uint8_t(source2[n] * sourceFactor);
1300 source0 += remaining + sourceFrame0PaddingElements;
1301 source1 += remaining + sourceFrame1PaddingElements;
1302 source2 += remaining + sourceFrame2PaddingElements;
1303 target += remaining * tChannels + targetFramePaddingElements;
1309 void AdvancedFrameChannels::zipChannelsOnlySourceFactorNEON<float, uint8_t, 4u>(
const float*
const*
const sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const float sourceFactor,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
1311 ocean_assert(sourceFrames !=
nullptr);
1312 ocean_assert(targetFrame !=
nullptr);
1314 ocean_assert(width != 0u && height != 0u);
1316 constexpr
unsigned int tChannels = 4u;
1318 bool allSourceFramesContinuous =
true;
1320 if (sourceFramesPaddingElements !=
nullptr)
1322 for (
unsigned int n = 0u; n < tChannels; ++n)
1324 if (sourceFramesPaddingElements[n] != 0u)
1326 allSourceFramesContinuous =
false;
1332 const float32x4_t sourceFactor_32x4 = vdupq_n_f32(sourceFactor);
1334 const float* source0 = sourceFrames[0];
1335 const float* source1 = sourceFrames[1];
1336 const float* source2 = sourceFrames[2];
1337 const float* source3 = sourceFrames[3];
1338 uint8_t* target = targetFrame;
1340 constexpr
unsigned int tBlockSize = 16u;
1342 uint8x16x4_t target_8x16x4;
1344 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
1346 const unsigned int pixels = width * height;
1347 const unsigned int blocks = pixels / tBlockSize;
1348 const unsigned int remaining = pixels % tBlockSize;
1350 for (
unsigned int n = 0u; n < blocks; ++n)
1352 const float32x4_t source0_A_f_32x4 = vmulq_f32(vld1q_f32(source0 + 0), sourceFactor_32x4);
1353 const float32x4_t source0_B_f_32x4 = vmulq_f32(vld1q_f32(source0 + 4), sourceFactor_32x4);
1354 const float32x4_t source0_C_f_32x4 = vmulq_f32(vld1q_f32(source0 + 8), sourceFactor_32x4);
1355 const float32x4_t source0_D_f_32x4 = vmulq_f32(vld1q_f32(source0 + 12), sourceFactor_32x4);
1357 const float32x4_t source1_A_f_32x4 = vmulq_f32(vld1q_f32(source1 + 0), sourceFactor_32x4);
1358 const float32x4_t source1_B_f_32x4 = vmulq_f32(vld1q_f32(source1 + 4), sourceFactor_32x4);
1359 const float32x4_t source1_C_f_32x4 = vmulq_f32(vld1q_f32(source1 + 8), sourceFactor_32x4);
1360 const float32x4_t source1_D_f_32x4 = vmulq_f32(vld1q_f32(source1 + 12), sourceFactor_32x4);
1362 const float32x4_t source2_A_f_32x4 = vmulq_f32(vld1q_f32(source2 + 0), sourceFactor_32x4);
1363 const float32x4_t source2_B_f_32x4 = vmulq_f32(vld1q_f32(source2 + 4), sourceFactor_32x4);
1364 const float32x4_t source2_C_f_32x4 = vmulq_f32(vld1q_f32(source2 + 8), sourceFactor_32x4);
1365 const float32x4_t source2_D_f_32x4 = vmulq_f32(vld1q_f32(source2 + 12), sourceFactor_32x4);
1367 const float32x4_t source3_A_f_32x4 = vmulq_f32(vld1q_f32(source3 + 0), sourceFactor_32x4);
1368 const float32x4_t source3_B_f_32x4 = vmulq_f32(vld1q_f32(source3 + 4), sourceFactor_32x4);
1369 const float32x4_t source3_C_f_32x4 = vmulq_f32(vld1q_f32(source3 + 8), sourceFactor_32x4);
1370 const float32x4_t source3_D_f_32x4 = vmulq_f32(vld1q_f32(source3 + 12), sourceFactor_32x4);
1372 target_8x16x4.val[0] =
NEON::cast16ElementsNEON(source0_A_f_32x4, source0_B_f_32x4, source0_C_f_32x4, source0_D_f_32x4);
1373 target_8x16x4.val[1] =
NEON::cast16ElementsNEON(source1_A_f_32x4, source1_B_f_32x4, source1_C_f_32x4, source1_D_f_32x4);
1374 target_8x16x4.val[2] =
NEON::cast16ElementsNEON(source2_A_f_32x4, source2_B_f_32x4, source2_C_f_32x4, source2_D_f_32x4);
1375 target_8x16x4.val[3] =
NEON::cast16ElementsNEON(source3_A_f_32x4, source3_B_f_32x4, source3_C_f_32x4, source3_D_f_32x4);
1377 vst4q_u8(target, target_8x16x4);
1379 source0 += tBlockSize;
1380 source1 += tBlockSize;
1381 source2 += tBlockSize;
1382 source3 += tBlockSize;
1384 target += tBlockSize * tChannels;
1387 for (
unsigned int n = 0u; n < remaining; ++n)
1389 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
1390 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
1391 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
1392 ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
1394 target[n * tChannels + 0u] = uint8_t(source0[n] * sourceFactor);
1395 target[n * tChannels + 1u] = uint8_t(source1[n] * sourceFactor);
1396 target[n * tChannels + 2u] = uint8_t(source2[n] * sourceFactor);
1397 target[n * tChannels + 3u] = uint8_t(source3[n] * sourceFactor);
1402 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
1403 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
1404 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[2];
1405 const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[3];
1407 const unsigned int blocks = width / tBlockSize;
1408 const unsigned int remaining = width % tBlockSize;
1410 for (
unsigned int y = 0u; y < height; ++y)
1412 for (
unsigned int n = 0u; n < blocks; ++n)
1414 const float32x4_t source0_A_f_32x4 = vmulq_f32(vld1q_f32(source0 + 0), sourceFactor_32x4);
1415 const float32x4_t source0_B_f_32x4 = vmulq_f32(vld1q_f32(source0 + 4), sourceFactor_32x4);
1416 const float32x4_t source0_C_f_32x4 = vmulq_f32(vld1q_f32(source0 + 8), sourceFactor_32x4);
1417 const float32x4_t source0_D_f_32x4 = vmulq_f32(vld1q_f32(source0 + 12), sourceFactor_32x4);
1419 const float32x4_t source1_A_f_32x4 = vmulq_f32(vld1q_f32(source1 + 0), sourceFactor_32x4);
1420 const float32x4_t source1_B_f_32x4 = vmulq_f32(vld1q_f32(source1 + 4), sourceFactor_32x4);
1421 const float32x4_t source1_C_f_32x4 = vmulq_f32(vld1q_f32(source1 + 8), sourceFactor_32x4);
1422 const float32x4_t source1_D_f_32x4 = vmulq_f32(vld1q_f32(source1 + 12), sourceFactor_32x4);
1424 const float32x4_t source2_A_f_32x4 = vmulq_f32(vld1q_f32(source2 + 0), sourceFactor_32x4);
1425 const float32x4_t source2_B_f_32x4 = vmulq_f32(vld1q_f32(source2 + 4), sourceFactor_32x4);
1426 const float32x4_t source2_C_f_32x4 = vmulq_f32(vld1q_f32(source2 + 8), sourceFactor_32x4);
1427 const float32x4_t source2_D_f_32x4 = vmulq_f32(vld1q_f32(source2 + 12), sourceFactor_32x4);
1429 const float32x4_t source3_A_f_32x4 = vmulq_f32(vld1q_f32(source3 + 0), sourceFactor_32x4);
1430 const float32x4_t source3_B_f_32x4 = vmulq_f32(vld1q_f32(source3 + 4), sourceFactor_32x4);
1431 const float32x4_t source3_C_f_32x4 = vmulq_f32(vld1q_f32(source3 + 8), sourceFactor_32x4);
1432 const float32x4_t source3_D_f_32x4 = vmulq_f32(vld1q_f32(source3 + 12), sourceFactor_32x4);
1434 target_8x16x4.val[0] =
NEON::cast16ElementsNEON(source0_A_f_32x4, source0_B_f_32x4, source0_C_f_32x4, source0_D_f_32x4);
1435 target_8x16x4.val[1] =
NEON::cast16ElementsNEON(source1_A_f_32x4, source1_B_f_32x4, source1_C_f_32x4, source1_D_f_32x4);
1436 target_8x16x4.val[2] =
NEON::cast16ElementsNEON(source2_A_f_32x4, source2_B_f_32x4, source2_C_f_32x4, source2_D_f_32x4);
1437 target_8x16x4.val[3] =
NEON::cast16ElementsNEON(source3_A_f_32x4, source3_B_f_32x4, source3_C_f_32x4, source3_D_f_32x4);
1439 vst4q_u8(target, target_8x16x4);
1441 source0 += tBlockSize;
1442 source1 += tBlockSize;
1443 source2 += tBlockSize;
1444 source3 += tBlockSize;
1446 target += tBlockSize * tChannels;
1449 for (
unsigned int n = 0u; n < remaining; ++n)
1451 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
1452 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
1453 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
1454 ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
1456 target[n * tChannels + 0u] = uint8_t(source0[n] * sourceFactor);
1457 target[n * tChannels + 1u] = uint8_t(source1[n] * sourceFactor);
1458 target[n * tChannels + 2u] = uint8_t(source2[n] * sourceFactor);
1459 target[n * tChannels + 3u] = uint8_t(source3[n] * sourceFactor);
1462 source0 += remaining + sourceFrame0PaddingElements;
1463 source1 += remaining + sourceFrame1PaddingElements;
1464 source2 += remaining + sourceFrame2PaddingElements;
1465 source3 += remaining + sourceFrame3PaddingElements;
1466 target += remaining * tChannels + targetFramePaddingElements;
This class implements advanced frame channel conversion, transformation and extraction functions.
Definition: AdvancedFrameChannels.h:30
static void zipChannels(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition: AdvancedFrameChannels.h:378
static void zipChannelsRuntime(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition: AdvancedFrameChannels.h:571
static void zipChannelsOnlySourceFactorNEON(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const TSource sourceFactor, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
static void separateTo1ChannelOnlyTargetFactorNEON(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static void separateTo1ChannelRuntime(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition: AdvancedFrameChannels.h:495
static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME
Definition of a constant to specify that the number of channels are not known at compile time but at ...
Definition: AdvancedFrameChannels.h:36
static void separateTo1Channel(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const TSource sourceFactor, const TTarget targetFactor, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition: AdvancedFrameChannels.h:254
static OCEAN_FORCE_INLINE uint8x16_t cast16ElementsNEON(const float32x4_t &sourceA_f_32x4, const float32x4_t &sourceB_f_32x4, const float32x4_t &sourceC_f_32x4, const float32x4_t &sourceD_f_32x4)
Casts 16 float elements to 16 uint8_t elements.
Definition: NEON.h:1208
std::vector< Index32 > Indices32
Definition of a vector holding 32 bit index values.
Definition: Base.h:96
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15