8 #ifndef META_OCEAN_CV_FRAME_TRANSPOSER_H
9 #define META_OCEAN_CV_FRAME_TRANSPOSER_H
59 static inline bool rotate90(
Frame& frame,
const bool clockwise,
Worker* worker =
nullptr);
76 static inline bool rotate180(
Frame& frame,
Worker* worker =
nullptr);
95 static inline bool rotate(
Frame& frame,
const int angle,
Worker* worker =
nullptr);
120 template <
typename T,
unsigned int tChannels>
134 template <FlipDirection tFlipDirection>
135 static OCEAN_FORCE_INLINE
void transposeBlock8x8(
const T* sourceBlock, T* targetBlock,
const unsigned int sourceStrideElements,
const unsigned int targetStrideElements);
149 template <FlipDirection tFlipDirection>
150 static OCEAN_FORCE_INLINE
void transposeBlock(
const T* sourceBlock, T* targetBlock,
const unsigned int blockWidth,
const unsigned int blockHeight,
const unsigned int sourceStrideElements,
const unsigned int targetStrideElements);
152 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
163 template <FlipDirection tFlipDirection>
164 static OCEAN_FORCE_INLINE
void transposeBlock4x4NEON(
const T* sourceBlock, T* targetBlock,
const unsigned int sourceStrideElements,
const unsigned int targetStrideElements);
188 static inline bool transpose(
Frame& frame,
Worker* worker =
nullptr);
202 template <
typename T,
unsigned int tChannels>
203 static void transpose(
const T* source, T* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
218 template <
typename T,
unsigned int tChannels>
219 static void rotate90(
const T* source, T* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const bool clockwise,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
233 template <
typename T,
unsigned int tChannels>
234 static void rotate180(
const T* source, T* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
250 template <
typename T,
unsigned int tChannels>
251 static bool rotate(
const T* source, T* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const int angle,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
270 template <
typename T,
unsigned int tChannels, FlipDirection tFlipDirection>
271 static void transposeSubset(
const T* source, T* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstSourceRow,
const unsigned int numberSourceRows);
287 template <
typename TElementType,
unsigned int tChannels>
288 static void rotate90Subset(
const TElementType* source, TElementType* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const bool clockwise,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows);
296 if (!
rotate90(frame, tmpFrame, clockwise, worker))
304 frame = std::move(tmpFrame);
322 frame = std::move(tmpFrame);
337 if (!
rotate(frame, tmpFrame, angle, worker))
345 frame = std::move(tmpFrame);
364 frame = std::move(tmpFrame);
368 template <
typename T,
unsigned int tChannels>
369 void FrameTransposer::transpose(
const T* source, T* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
371 static_assert(tChannels != 0u,
"Invalid channel number!");
373 ocean_assert(source && target);
374 ocean_assert(source != target);
375 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
377 const unsigned int xBlocks8 = (sourceWidth + 7u) / 8u;
378 const unsigned int yBlocks8 = (sourceHeight + 7u) / 8u;
380 const unsigned int blocks8 = xBlocks8 * yBlocks8;
384 if (worker && blocks8 >= 800u)
386 worker->
executeFunction(
Worker::Function::createStatic(&transposeSubset<MappedType, tChannels, FD_NONE>, (
const MappedType*)(source), (MappedType*)(target), sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, blocks8);
390 transposeSubset<MappedType, tChannels, FD_NONE>((
const MappedType*)(source), (MappedType*)(target), sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements, 0u, blocks8);
394 template <
typename T,
unsigned int tChannels>
395 void FrameTransposer::rotate90(
const T* source, T* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const bool clockwise,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
397 static_assert(tChannels != 0u,
"Invalid channel number!");
399 ocean_assert(source && target);
400 ocean_assert(source != target);
401 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
405 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION > 0
412 worker->
executeFunction(
Worker::Function::createStatic(rotate90Subset<MappedType, tChannels>, (
const MappedType*)(source), (MappedType*)(target), sourceWidth, sourceHeight, clockwise, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, sourceWidth, 7u, 8u, 20u);
416 rotate90Subset<MappedType, tChannels>((
const MappedType*)(source), (MappedType*)(target), sourceWidth, sourceHeight, clockwise, sourcePaddingElements, targetPaddingElements, 0u, sourceWidth);
423 const unsigned int xBlocks8 = (sourceWidth + 7u) / 8u;
424 const unsigned int yBlocks8 = (sourceHeight + 7u) / 8u;
426 const unsigned int blocks8 = xBlocks8 * yBlocks8;
428 if (worker && blocks8 >= 800u)
432 worker->
executeFunction(
Worker::Function::createStatic(&transposeSubset<MappedType, tChannels, FD_LEFT_RIGHT>, (
const MappedType*)(source), (MappedType*)(target), sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, blocks8);
436 worker->
executeFunction(
Worker::Function::createStatic(&transposeSubset<MappedType, tChannels, FD_TOP_BOTTOM>, (
const MappedType*)(source), (MappedType*)(target), sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, blocks8);
443 transposeSubset<MappedType, tChannels, FD_LEFT_RIGHT>((
const MappedType*)(source), (MappedType*)(target), sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements, 0u, blocks8);
447 transposeSubset<MappedType, tChannels, FD_TOP_BOTTOM>((
const MappedType*)(source), (MappedType*)(target), sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements, 0u, blocks8);
454 template <
typename T,
unsigned int tChannels>
455 void FrameTransposer::rotate180(
const T* source, T* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
457 static_assert(tChannels != 0u,
"Invalid channel number!");
459 ocean_assert(source !=
nullptr);
460 ocean_assert(target !=
nullptr);
462 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
467 template <
typename T,
unsigned int tChannels>
468 bool FrameTransposer::rotate(
const T* source, T* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const int angle,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
470 static_assert(tChannels != 0u,
"Invalid channel number!");
472 ocean_assert(source !=
nullptr);
473 ocean_assert(target !=
nullptr);
475 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
479 ocean_assert(
false &&
"Angle must be multiple of +/- 90");
483 int adjustedAngle = angle % 360;
485 if (adjustedAngle < 0)
487 adjustedAngle = 360 + adjustedAngle;
490 ocean_assert(adjustedAngle == 0 || adjustedAngle == 90 || adjustedAngle == 180 || adjustedAngle == 270);
492 switch (adjustedAngle)
495 CV::FrameChannels::subFrame<T>(source, target, sourceWidth, sourceHeight, sourceWidth, sourceHeight, tChannels, 0u, 0u, 0u, 0u, sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements);
499 rotate90<T, tChannels>(source, target, sourceWidth, sourceHeight,
true , sourcePaddingElements, targetPaddingElements, worker);
503 rotate180<T, tChannels>(source, target, sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements, worker);
507 rotate90<T, tChannels>(source, target, sourceWidth, sourceHeight,
false , sourcePaddingElements, targetPaddingElements, worker);
514 ocean_assert(
false &&
"This should never happen!");
518 template <
typename T,
unsigned int tChannels, FrameTransposer::FlipDirection tFlipDirection>
519 void FrameTransposer::transposeSubset(
const T* source, T* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstBlock8,
const unsigned int numberBlocks8)
521 static_assert(
sizeof(T) != 0,
"Invalid data type!");
522 static_assert(tChannels != 0u,
"Invalid channel number!");
524 ocean_assert(source && target);
525 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
527 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
528 const unsigned int targetStrideElements = sourceHeight * tChannels + targetPaddingElements;
530 const unsigned int xBlocks8 = (sourceWidth + 7u) / 8u;
531 const unsigned int yBlocks8 = (sourceHeight + 7u) / 8u;
532 ocean_assert(firstBlock8 + numberBlocks8 <= xBlocks8 * yBlocks8);
534 const unsigned int xSmallBlockIndex = xBlocks8 * 8u == sourceWidth ? (
unsigned int)(-1) : (xBlocks8 - 1u);
535 const unsigned int ySmallBlockIndex = yBlocks8 * 8u == sourceHeight ? (
unsigned int)(-1) : (yBlocks8 - 1u);
537 for (
unsigned int block8 = firstBlock8; block8 < firstBlock8 + numberBlocks8; ++block8)
539 const unsigned int yBlock8 = block8 / xBlocks8;
540 const unsigned int xBlock8 = block8 % xBlocks8;
542 const T* sourceBlockTopLeft =
nullptr;
543 T* targetBlockTopLeft =
nullptr;
545 switch (tFlipDirection)
551 sourceBlockTopLeft = source + sourceStrideElements * yBlock8 * 8u + xBlock8 * 8u * tChannels;
552 targetBlockTopLeft = target + targetStrideElements * xBlock8 * 8u + yBlock8 * 8u * tChannels;
561 const unsigned int xTarget = (
unsigned int)(std::max(0,
int(sourceHeight) - int((yBlock8 + 1u) * 8u)));
563 sourceBlockTopLeft = source + sourceStrideElements * yBlock8 * 8u + xBlock8 * 8u * tChannels;
564 targetBlockTopLeft = target + targetStrideElements * xBlock8 * 8u + xTarget * tChannels;
573 const unsigned int yTarget = (
unsigned int)(std::max(0,
int(sourceWidth) - int((xBlock8 + 1u) * 8u)));
575 sourceBlockTopLeft = source + sourceStrideElements * yBlock8 * 8u + xBlock8 * 8u * tChannels;
576 targetBlockTopLeft = target + targetStrideElements * yTarget + yBlock8 * 8u * tChannels;
582 ocean_assert(
false &&
"Invalid flip direction!");
585 ocean_assert(sourceBlockTopLeft !=
nullptr);
586 ocean_assert(targetBlockTopLeft !=
nullptr);
588 if (xBlock8 != xSmallBlockIndex && yBlock8 != ySmallBlockIndex)
594 const unsigned int blockWidth = min(sourceWidth - xBlock8 * 8u, 8u);
595 const unsigned int blockHeight = min(sourceHeight - yBlock8 * 8u, 8u);
602 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
605 template <FrameTransposer::FlipDirection tFlipDirection>
608 ocean_assert(sourceBlock && targetBlock);
609 ocean_assert(sourceStrideElements >= 8u && targetStrideElements >= 8u);
617 __m128 line02_f_32x4 = _mm_setzero_ps();
618 __m128 line13_f_32x4 = _mm_setzero_ps();
620 line02_f_32x4 = _mm_loadl_pi(line02_f_32x4, (
const __m64*)(sourceBlock + sourceStrideElements * 0u));
621 line13_f_32x4 = _mm_loadl_pi(line13_f_32x4, (
const __m64*)(sourceBlock + sourceStrideElements * 1u));
622 line02_f_32x4 = _mm_loadh_pi(line02_f_32x4, (
const __m64*)(sourceBlock + sourceStrideElements * 2u));
623 line13_f_32x4 = _mm_loadh_pi(line13_f_32x4, (
const __m64*)(sourceBlock + sourceStrideElements * 3u));
625 const __m128i line01_u_8x16 = _mm_unpacklo_epi8(_mm_castps_si128(line02_f_32x4), _mm_castps_si128(line13_f_32x4));
626 const __m128i line23_u_8x16 = _mm_unpackhi_epi8(_mm_castps_si128(line02_f_32x4), _mm_castps_si128(line13_f_32x4));
628 const __m128i intermediateA_03_u_8x16 = _mm_unpacklo_epi16(line01_u_8x16, line23_u_8x16);
629 const __m128i intermediateB_03_u_8x16 = _mm_unpackhi_epi16(line01_u_8x16, line23_u_8x16);
631 __m128 line46_f_32x4 = _mm_setzero_ps();
632 __m128 line57_f_32x4 = _mm_setzero_ps();
633 line46_f_32x4 = _mm_loadl_pi(line46_f_32x4, (
const __m64*)(sourceBlock + sourceStrideElements * 4u));
634 line57_f_32x4 = _mm_loadl_pi(line57_f_32x4, (
const __m64*)(sourceBlock + sourceStrideElements * 5u));
635 line46_f_32x4 = _mm_loadh_pi(line46_f_32x4, (
const __m64*)(sourceBlock + sourceStrideElements * 6u));
636 line57_f_32x4 = _mm_loadh_pi(line57_f_32x4, (
const __m64*)(sourceBlock + sourceStrideElements * 7u));
638 const __m128i line45_u_8x16 = _mm_unpacklo_epi8(_mm_castps_si128(line46_f_32x4), _mm_castps_si128(line57_f_32x4));
639 const __m128i line67_u_8x16 = _mm_unpackhi_epi8(_mm_castps_si128(line46_f_32x4), _mm_castps_si128(line57_f_32x4));
641 const __m128i intermediateA_47_u_8x16 = _mm_unpacklo_epi16(line45_u_8x16, line67_u_8x16);
642 const __m128i intermediateB_47_u_8x16 = _mm_unpackhi_epi16(line45_u_8x16, line67_u_8x16);
644 __m128i transposed01 = _mm_unpacklo_epi32(intermediateA_03_u_8x16, intermediateA_47_u_8x16);
645 __m128i transposed23 = _mm_unpackhi_epi32(intermediateA_03_u_8x16, intermediateA_47_u_8x16);
646 __m128i transposed45 = _mm_unpacklo_epi32(intermediateB_03_u_8x16, intermediateB_47_u_8x16);
647 __m128i transposed67 = _mm_unpackhi_epi32(intermediateB_03_u_8x16, intermediateB_47_u_8x16);
649 switch (tFlipDirection)
653 const __m128i reverseSuffleMask_u_16x8 = _mm_set_epi64x(0x08090A0B0C0D0E0Fll, 0x0001020304050607ll);
655 transposed01 = _mm_shuffle_epi8(transposed01, reverseSuffleMask_u_16x8);
656 transposed23 = _mm_shuffle_epi8(transposed23, reverseSuffleMask_u_16x8);
657 transposed45 = _mm_shuffle_epi8(transposed45, reverseSuffleMask_u_16x8);
658 transposed67 = _mm_shuffle_epi8(transposed67, reverseSuffleMask_u_16x8);
666 _mm_storel_pi((__m64*)(targetBlock + targetStrideElements * 0u), _mm_castsi128_ps(transposed01));
667 _mm_storeh_pi((__m64*)(targetBlock + targetStrideElements * 1u), _mm_castsi128_ps(transposed01));
668 _mm_storel_pi((__m64*)(targetBlock + targetStrideElements * 2u), _mm_castsi128_ps(transposed23));
669 _mm_storeh_pi((__m64*)(targetBlock + targetStrideElements * 3u), _mm_castsi128_ps(transposed23));
670 _mm_storel_pi((__m64*)(targetBlock + targetStrideElements * 4u), _mm_castsi128_ps(transposed45));
671 _mm_storeh_pi((__m64*)(targetBlock + targetStrideElements * 5u), _mm_castsi128_ps(transposed45));
672 _mm_storel_pi((__m64*)(targetBlock + targetStrideElements * 6u), _mm_castsi128_ps(transposed67));
673 _mm_storeh_pi((__m64*)(targetBlock + targetStrideElements * 7u), _mm_castsi128_ps(transposed67));
680 _mm_storeh_pi((__m64*)(targetBlock + targetStrideElements * 0u), _mm_castsi128_ps(transposed67));
681 _mm_storel_pi((__m64*)(targetBlock + targetStrideElements * 1u), _mm_castsi128_ps(transposed67));
682 _mm_storeh_pi((__m64*)(targetBlock + targetStrideElements * 2u), _mm_castsi128_ps(transposed45));
683 _mm_storel_pi((__m64*)(targetBlock + targetStrideElements * 3u), _mm_castsi128_ps(transposed45));
684 _mm_storeh_pi((__m64*)(targetBlock + targetStrideElements * 4u), _mm_castsi128_ps(transposed23));
685 _mm_storel_pi((__m64*)(targetBlock + targetStrideElements * 5u), _mm_castsi128_ps(transposed23));
686 _mm_storeh_pi((__m64*)(targetBlock + targetStrideElements * 6u), _mm_castsi128_ps(transposed01));
687 _mm_storel_pi((__m64*)(targetBlock + targetStrideElements * 7u), _mm_castsi128_ps(transposed01));
693 ocean_assert(
false &&
"Invalid flip direction!");
698 template <FrameTransposer::FlipDirection tFlipDirection>
701 ocean_assert(sourceBlock && targetBlock);
702 ocean_assert(sourceStrideElements >= 8u && targetStrideElements >= 8u);
710 const __m128i line0_u_8x16 = _mm_loadu_si128((
const __m128i*)(sourceBlock + sourceStrideElements * 0u));
711 const __m128i line1_u_8x16 = _mm_loadu_si128((
const __m128i*)(sourceBlock + sourceStrideElements * 1u));
712 const __m128i line2_u_8x16 = _mm_loadu_si128((
const __m128i*)(sourceBlock + sourceStrideElements * 2u));
713 const __m128i line3_u_8x16 = _mm_loadu_si128((
const __m128i*)(sourceBlock + sourceStrideElements * 3u));
714 const __m128i line4_u_8x16 = _mm_loadu_si128((
const __m128i*)(sourceBlock + sourceStrideElements * 4u));
715 const __m128i line5_u_8x16 = _mm_loadu_si128((
const __m128i*)(sourceBlock + sourceStrideElements * 5u));
716 const __m128i line6_u_8x16 = _mm_loadu_si128((
const __m128i*)(sourceBlock + sourceStrideElements * 6u));
717 const __m128i line7_u_8x16 = _mm_loadu_si128((
const __m128i*)(sourceBlock + sourceStrideElements * 7u));
719 const __m128i line01_A_u_8x16 = _mm_unpacklo_epi16(line0_u_8x16, line1_u_8x16);
720 const __m128i line01_B_u_8x16 = _mm_unpackhi_epi16(line0_u_8x16, line1_u_8x16);
721 const __m128i line23_A_u_8x16 = _mm_unpacklo_epi16(line2_u_8x16, line3_u_8x16);
722 const __m128i line23_B_u_8x16 = _mm_unpackhi_epi16(line2_u_8x16, line3_u_8x16);
723 const __m128i line45_A_u_8x16 = _mm_unpacklo_epi16(line4_u_8x16, line5_u_8x16);
724 const __m128i line45_B_u_8x16 = _mm_unpackhi_epi16(line4_u_8x16, line5_u_8x16);
725 const __m128i line67_A_u_8x16 = _mm_unpacklo_epi16(line6_u_8x16, line7_u_8x16);
726 const __m128i line67_B_u_8x16 = _mm_unpackhi_epi16(line6_u_8x16, line7_u_8x16);
728 const __m128i intermediateAA_03_u_8x16 = _mm_unpacklo_epi32(line01_A_u_8x16, line23_A_u_8x16);
729 const __m128i intermediateAB_03_u_8x16 = _mm_unpackhi_epi32(line01_A_u_8x16, line23_A_u_8x16);
730 const __m128i intermediateBA_03_u_8x16 = _mm_unpacklo_epi32(line01_B_u_8x16, line23_B_u_8x16);
731 const __m128i intermediateBB_03_u_8x16 = _mm_unpackhi_epi32(line01_B_u_8x16, line23_B_u_8x16);
732 const __m128i intermediateAA_47_u_8x16 = _mm_unpacklo_epi32(line45_A_u_8x16, line67_A_u_8x16);
733 const __m128i intermediateAB_47_u_8x16 = _mm_unpackhi_epi32(line45_A_u_8x16, line67_A_u_8x16);
734 const __m128i intermediateBA_47_u_8x16 = _mm_unpacklo_epi32(line45_B_u_8x16, line67_B_u_8x16);
735 const __m128i intermediateBB_47_u_8x16 = _mm_unpackhi_epi32(line45_B_u_8x16, line67_B_u_8x16);
737 __m128i transposed0 = _mm_unpacklo_epi64(intermediateAA_03_u_8x16, intermediateAA_47_u_8x16);
738 __m128i transposed1 = _mm_unpackhi_epi64(intermediateAA_03_u_8x16, intermediateAA_47_u_8x16);
739 __m128i transposed2 = _mm_unpacklo_epi64(intermediateAB_03_u_8x16, intermediateAB_47_u_8x16);
740 __m128i transposed3 = _mm_unpackhi_epi64(intermediateAB_03_u_8x16, intermediateAB_47_u_8x16);
741 __m128i transposed4 = _mm_unpacklo_epi64(intermediateBA_03_u_8x16, intermediateBA_47_u_8x16);
742 __m128i transposed5 = _mm_unpackhi_epi64(intermediateBA_03_u_8x16, intermediateBA_47_u_8x16);
743 __m128i transposed6 = _mm_unpacklo_epi64(intermediateBB_03_u_8x16, intermediateBB_47_u_8x16);
744 __m128i transposed7 = _mm_unpackhi_epi64(intermediateBB_03_u_8x16, intermediateBB_47_u_8x16);
746 switch (tFlipDirection)
750 const __m128i reverseSuffleMask_u_16x8 = _mm_set_epi64x(0x0100030205040706ll, 0x09080B0A0D0C0F0Ell);
752 transposed0 = _mm_shuffle_epi8(transposed0, reverseSuffleMask_u_16x8);
753 transposed1 = _mm_shuffle_epi8(transposed1, reverseSuffleMask_u_16x8);
754 transposed2 = _mm_shuffle_epi8(transposed2, reverseSuffleMask_u_16x8);
755 transposed3 = _mm_shuffle_epi8(transposed3, reverseSuffleMask_u_16x8);
756 transposed4 = _mm_shuffle_epi8(transposed4, reverseSuffleMask_u_16x8);
757 transposed5 = _mm_shuffle_epi8(transposed5, reverseSuffleMask_u_16x8);
758 transposed6 = _mm_shuffle_epi8(transposed6, reverseSuffleMask_u_16x8);
759 transposed7 = _mm_shuffle_epi8(transposed7, reverseSuffleMask_u_16x8);
767 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 0u), transposed0);
768 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 1u), transposed1);
769 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 2u), transposed2);
770 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 3u), transposed3);
771 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 4u), transposed4);
772 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 5u), transposed5);
773 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 6u), transposed6);
774 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 7u), transposed7);
781 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 0u), transposed7);
782 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 1u), transposed6);
783 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 2u), transposed5);
784 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 3u), transposed4);
785 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 4u), transposed3);
786 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 5u), transposed2);
787 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 6u), transposed1);
788 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 7u), transposed0);
794 ocean_assert(
false &&
"Invalid flip direction!");
800 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
803 template <FrameTransposer::FlipDirection tFlipDirection>
806 ocean_assert(sourceBlock && targetBlock);
807 ocean_assert(sourceStrideElements >= 4u * 3u && targetStrideElements >= 4u * 3u);
811 const uint32x4_t line0_u_32x4 = vreinterpretq_u32_u8(vld1q_u8(sourceBlock + sourceStrideElements * 0u));
812 const uint32x4_t line1_u_32x4 = vreinterpretq_u32_u8(vld1q_u8(sourceBlock + sourceStrideElements * 1u));
816 const uint32x4x2_t line01_u_32x4x2 = vtrnq_u32(line0_u_32x4, line1_u_32x4);
818 const uint32x4_t line2_u_32x4 = vreinterpretq_u32_u8(vld1q_u8(sourceBlock + sourceStrideElements * 2u));
819 const uint32x4_t line3_u_32x4 = vreinterpretq_u32_u8(vld1q_u8(sourceBlock + sourceStrideElements * 3u));
823 const uint32x4x2_t line23_u_32x4x2 = vtrnq_u32(line2_u_32x4, line3_u_32x4);
829 const uint32x4_t result0_u_32x4 = vcombine_u32(vget_low_u32(line01_u_32x4x2.val[0]), vget_low_u32(line23_u_32x4x2.val[0]));
830 const uint32x4_t result1_u_32x4 = vcombine_u32(vget_low_u32(line01_u_32x4x2.val[1]), vget_low_u32(line23_u_32x4x2.val[1]));
831 const uint32x4_t result2_u_32x4 = vcombine_u32(vget_high_u32(line01_u_32x4x2.val[0]), vget_high_u32(line23_u_32x4x2.val[0]));
832 const uint32x4_t result3_u_32x4 = vcombine_u32(vget_high_u32(line01_u_32x4x2.val[1]), vget_high_u32(line23_u_32x4x2.val[1]));
834 switch (tFlipDirection)
838 vst1q_u8(targetBlock + targetStrideElements * 0u, vreinterpretq_u8_u32(result0_u_32x4));
839 vst1q_u8(targetBlock + targetStrideElements * 1u, vreinterpretq_u8_u32(result1_u_32x4));
840 vst1q_u8(targetBlock + targetStrideElements * 2u, vreinterpretq_u8_u32(result2_u_32x4));
841 vst1q_u8(targetBlock + targetStrideElements * 3u, vreinterpretq_u8_u32(result3_u_32x4));
848 const uint32x4_t halfReverseResult0_u_32x4 = vrev64q_u32(result0_u_32x4);
849 const uint8x16_t reverseResult0_u_32x4 = vreinterpretq_u8_u32(vcombine_u32(vget_high_u32(halfReverseResult0_u_32x4), vget_low_u32(halfReverseResult0_u_32x4)));
850 vst1q_u8(targetBlock + targetStrideElements * 0u, reverseResult0_u_32x4);
852 const uint32x4_t halfReverseResult1_u_32x4 = vrev64q_u32(result1_u_32x4);
853 const uint8x16_t reverseResult1_u_32x4 = vreinterpretq_u8_u32(vcombine_u32(vget_high_u32(halfReverseResult1_u_32x4), vget_low_u32(halfReverseResult1_u_32x4)));
854 vst1q_u8(targetBlock + targetStrideElements * 1u, reverseResult1_u_32x4);
856 const uint32x4_t halfReverseResult2_u_32x4 = vrev64q_u32(result2_u_32x4);
857 const uint8x16_t reverseResult2_u_32x4 = vreinterpretq_u8_u32(vcombine_u32(vget_high_u32(halfReverseResult2_u_32x4), vget_low_u32(halfReverseResult2_u_32x4)));
858 vst1q_u8(targetBlock + targetStrideElements * 2u, reverseResult2_u_32x4);
860 const uint32x4_t halfReverseResult3_u_32x4 = vrev64q_u32(result3_u_32x4);
861 const uint8x16_t reverseResult3_u_32x4 = vreinterpretq_u8_u32(vcombine_u32(vget_high_u32(halfReverseResult3_u_32x4), vget_low_u32(halfReverseResult3_u_32x4)));
862 vst1q_u8(targetBlock + targetStrideElements * 3u, reverseResult3_u_32x4);
869 vst1q_u8(targetBlock + targetStrideElements * 0u, vreinterpretq_u8_u32(result3_u_32x4));
870 vst1q_u8(targetBlock + targetStrideElements * 1u, vreinterpretq_u8_u32(result2_u_32x4));
871 vst1q_u8(targetBlock + targetStrideElements * 2u, vreinterpretq_u8_u32(result1_u_32x4));
872 vst1q_u8(targetBlock + targetStrideElements * 3u, vreinterpretq_u8_u32(result0_u_32x4));
878 ocean_assert(
false &&
"Invalid flip direction!");
883 template <FrameTransposer::FlipDirection tFlipDirection>
886 ocean_assert(sourceBlock && targetBlock);
887 ocean_assert(sourceStrideElements >= 8u && targetStrideElements >= 8u);
891 const uint8x8_t line0_u_8x8 = vld1_u8(sourceBlock + sourceStrideElements * 0u);
892 const uint8x8_t line1_u_8x8 = vld1_u8(sourceBlock + sourceStrideElements * 1u);
896 const uint8x8x2_t line01_u_8x8x2 = vtrn_u8(line0_u_8x8, line1_u_8x8);
898 const uint8x8_t line2_u_8x8 = vld1_u8(sourceBlock + sourceStrideElements * 2u);
899 const uint8x8_t line3_u_8x8 = vld1_u8(sourceBlock + sourceStrideElements * 3u);
903 const uint8x8x2_t line23_u_8x8x2 = vtrn_u8(line2_u_8x8, line3_u_8x8);
907 const uint16x4x2_t line02_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line01_u_8x8x2.val[0]), vreinterpret_u16_u8(line23_u_8x8x2.val[0]));
911 const uint16x4x2_t line13_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line01_u_8x8x2.val[1]), vreinterpret_u16_u8(line23_u_8x8x2.val[1]));
913 const uint8x8_t line4_u_8x8 = vld1_u8(sourceBlock + sourceStrideElements * 4u);
914 const uint8x8_t line5_u_8x8 = vld1_u8(sourceBlock + sourceStrideElements * 5u);
916 const uint8x8x2_t line45_u_8x8x2 = vtrn_u8(line4_u_8x8, line5_u_8x8);
918 const uint8x8_t line6_u_8x8 = vld1_u8(sourceBlock + sourceStrideElements * 6u);
919 const uint8x8_t line7_u_8x8 = vld1_u8(sourceBlock + sourceStrideElements * 7u);
921 const uint8x8x2_t line67_u_8x8x2 = vtrn_u8(line6_u_8x8, line7_u_8x8);
923 const uint16x4x2_t line46_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line45_u_8x8x2.val[0]), vreinterpret_u16_u8(line67_u_8x8x2.val[0]));
924 const uint16x4x2_t line57_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line45_u_8x8x2.val[1]), vreinterpret_u16_u8(line67_u_8x8x2.val[1]));
926 const uint32x2x2_t line04_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line02_u_16x4x2.val[0]), vreinterpret_u32_u16(line46_u_16x4x2.val[0]));
927 const uint32x2x2_t line26_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line02_u_16x4x2.val[1]), vreinterpret_u32_u16(line46_u_16x4x2.val[1]));
929 const uint32x2x2_t line15_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line13_u_16x4x2.val[0]), vreinterpret_u32_u16(line57_u_16x4x2.val[0]));
930 const uint32x2x2_t line37_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line13_u_16x4x2.val[1]), vreinterpret_u32_u16(line57_u_16x4x2.val[1]));
932 switch (tFlipDirection)
936 vst1_u8(targetBlock + targetStrideElements * 0u, vreinterpret_u8_u32(line04_u_32x2x2.val[0]));
937 vst1_u8(targetBlock + targetStrideElements * 1u, vreinterpret_u8_u32(line15_u_32x2x2.val[0]));
938 vst1_u8(targetBlock + targetStrideElements * 2u, vreinterpret_u8_u32(line26_u_32x2x2.val[0]));
939 vst1_u8(targetBlock + targetStrideElements * 3u, vreinterpret_u8_u32(line37_u_32x2x2.val[0]));
940 vst1_u8(targetBlock + targetStrideElements * 4u, vreinterpret_u8_u32(line04_u_32x2x2.val[1]));
941 vst1_u8(targetBlock + targetStrideElements * 5u, vreinterpret_u8_u32(line15_u_32x2x2.val[1]));
942 vst1_u8(targetBlock + targetStrideElements * 6u, vreinterpret_u8_u32(line26_u_32x2x2.val[1]));
943 vst1_u8(targetBlock + targetStrideElements * 7u, vreinterpret_u8_u32(line37_u_32x2x2.val[1]));
950 vst1_u8(targetBlock + targetStrideElements * 0u, vrev64_u8(vreinterpret_u8_u32(line04_u_32x2x2.val[0])));
951 vst1_u8(targetBlock + targetStrideElements * 1u, vrev64_u8(vreinterpret_u8_u32(line15_u_32x2x2.val[0])));
952 vst1_u8(targetBlock + targetStrideElements * 2u, vrev64_u8(vreinterpret_u8_u32(line26_u_32x2x2.val[0])));
953 vst1_u8(targetBlock + targetStrideElements * 3u, vrev64_u8(vreinterpret_u8_u32(line37_u_32x2x2.val[0])));
954 vst1_u8(targetBlock + targetStrideElements * 4u, vrev64_u8(vreinterpret_u8_u32(line04_u_32x2x2.val[1])));
955 vst1_u8(targetBlock + targetStrideElements * 5u, vrev64_u8(vreinterpret_u8_u32(line15_u_32x2x2.val[1])));
956 vst1_u8(targetBlock + targetStrideElements * 6u, vrev64_u8(vreinterpret_u8_u32(line26_u_32x2x2.val[1])));
957 vst1_u8(targetBlock + targetStrideElements * 7u, vrev64_u8(vreinterpret_u8_u32(line37_u_32x2x2.val[1])));
964 vst1_u8(targetBlock + targetStrideElements * 0u, vreinterpret_u8_u32(line37_u_32x2x2.val[1]));
965 vst1_u8(targetBlock + targetStrideElements * 1u, vreinterpret_u8_u32(line26_u_32x2x2.val[1]));
966 vst1_u8(targetBlock + targetStrideElements * 2u, vreinterpret_u8_u32(line15_u_32x2x2.val[1]));
967 vst1_u8(targetBlock + targetStrideElements * 3u, vreinterpret_u8_u32(line04_u_32x2x2.val[1]));
968 vst1_u8(targetBlock + targetStrideElements * 4u, vreinterpret_u8_u32(line37_u_32x2x2.val[0]));
969 vst1_u8(targetBlock + targetStrideElements * 5u, vreinterpret_u8_u32(line26_u_32x2x2.val[0]));
970 vst1_u8(targetBlock + targetStrideElements * 6u, vreinterpret_u8_u32(line15_u_32x2x2.val[0]));
971 vst1_u8(targetBlock + targetStrideElements * 7u, vreinterpret_u8_u32(line04_u_32x2x2.val[0]));
977 ocean_assert(
false &&
"Invalid flip direction!");
982 template <FrameTransposer::FlipDirection tFlipDirection>
985 ocean_assert(sourceBlock && targetBlock);
986 ocean_assert(sourceStrideElements >= 8u * 2u && targetStrideElements >= 8u * 2u);
991 const uint16x8_t line0_u_16x8 = vreinterpretq_u16_u8(vld1q_u8(sourceBlock + sourceStrideElements * 0u));
992 const uint16x8_t line1_u_16x8 = vreinterpretq_u16_u8(vld1q_u8(sourceBlock + sourceStrideElements * 1u));
996 const uint16x8x2_t line01_u_16x8x2 = vtrnq_u16(line0_u_16x8, line1_u_16x8);
998 const uint16x8_t line2_u_16x8 = vreinterpretq_u16_u8(vld1q_u8(sourceBlock + sourceStrideElements * 2u));
999 const uint16x8_t line3_u_16x8 = vreinterpretq_u16_u8(vld1q_u8(sourceBlock + sourceStrideElements * 3u));
1003 const uint16x8x2_t line23_u_16x8x2 = vtrnq_u16(line2_u_16x8, line3_u_16x8);
1007 const uint32x4x2_t line02_u_32x4x2 = vtrnq_u32(vreinterpretq_u32_u16(line01_u_16x8x2.val[0]), vreinterpretq_u32_u16(line23_u_16x8x2.val[0]));
1011 const uint32x4x2_t line13_u_32x4x2 = vtrnq_u32(vreinterpretq_u32_u16(line01_u_16x8x2.val[1]), vreinterpretq_u32_u16(line23_u_16x8x2.val[1]));
1013 const uint16x8_t line4_u_16x8 = vreinterpretq_u16_u8(vld1q_u8(sourceBlock + sourceStrideElements * 4u));
1014 const uint16x8_t line5_u_16x8 = vreinterpretq_u16_u8(vld1q_u8(sourceBlock + sourceStrideElements * 5u));
1016 const uint16x8x2_t line45_u_16x8x2 = vtrnq_u16(line4_u_16x8, line5_u_16x8);
1018 const uint16x8_t line6_u_16x8 = vreinterpretq_u16_u8(vld1q_u8(sourceBlock + sourceStrideElements * 6u));
1019 const uint16x8_t line7_u_16x8 = vreinterpretq_u16_u8(vld1q_u8(sourceBlock + sourceStrideElements * 7u));
1021 const uint16x8x2_t line67_u_16x8x2 = vtrnq_u16(line6_u_16x8, line7_u_16x8);
1023 const uint32x4x2_t line46_u_32x4x2 = vtrnq_u32(vreinterpretq_u32_u16(line45_u_16x8x2.val[0]), vreinterpretq_u32_u16(line67_u_16x8x2.val[0]));
1024 const uint32x4x2_t line57_u_32x4x2 = vtrnq_u32(vreinterpretq_u32_u16(line45_u_16x8x2.val[1]), vreinterpretq_u32_u16(line67_u_16x8x2.val[1]));
1026 const uint32x4_t result0_u_32x4 = vcombine_u32(vget_low_u32(line02_u_32x4x2.val[0]), vget_low_u32(line46_u_32x4x2.val[0]));
1027 const uint32x4_t result1_u_32x4 = vcombine_u32(vget_low_u32(line13_u_32x4x2.val[0]), vget_low_u32(line57_u_32x4x2.val[0]));
1029 const uint32x4_t result2_u_32x4 = vcombine_u32(vget_low_u32(line02_u_32x4x2.val[1]), vget_low_u32(line46_u_32x4x2.val[1]));
1030 const uint32x4_t result3_u_32x4 = vcombine_u32(vget_low_u32(line13_u_32x4x2.val[1]), vget_low_u32(line57_u_32x4x2.val[1]));
1032 const uint32x4_t result4_u_32x4 = vcombine_u32(vget_high_u32(line02_u_32x4x2.val[0]), vget_high_u32(line46_u_32x4x2.val[0]));
1033 const uint32x4_t result5_u_32x4 = vcombine_u32(vget_high_u32(line13_u_32x4x2.val[0]), vget_high_u32(line57_u_32x4x2.val[0]));
1035 const uint32x4_t result6_u_32x4 = vcombine_u32(vget_high_u32(line02_u_32x4x2.val[1]), vget_high_u32(line46_u_32x4x2.val[1]));
1036 const uint32x4_t result7_u_32x4 = vcombine_u32(vget_high_u32(line13_u_32x4x2.val[1]), vget_high_u32(line57_u_32x4x2.val[1]));
1038 switch (tFlipDirection)
1042 vst1q_u8(targetBlock + targetStrideElements * 0u, vreinterpretq_u8_u32(result0_u_32x4));
1043 vst1q_u8(targetBlock + targetStrideElements * 1u, vreinterpretq_u8_u32(result1_u_32x4));
1044 vst1q_u8(targetBlock + targetStrideElements * 2u, vreinterpretq_u8_u32(result2_u_32x4));
1045 vst1q_u8(targetBlock + targetStrideElements * 3u, vreinterpretq_u8_u32(result3_u_32x4));
1046 vst1q_u8(targetBlock + targetStrideElements * 4u, vreinterpretq_u8_u32(result4_u_32x4));
1047 vst1q_u8(targetBlock + targetStrideElements * 5u, vreinterpretq_u8_u32(result5_u_32x4));
1048 vst1q_u8(targetBlock + targetStrideElements * 6u, vreinterpretq_u8_u32(result6_u_32x4));
1049 vst1q_u8(targetBlock + targetStrideElements * 7u, vreinterpretq_u8_u32(result7_u_32x4));
1056 const uint8x16_t targetHalfReverse0_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u32(result0_u_32x4)));
1057 vst1q_u8(targetBlock + targetStrideElements * 0u, vcombine_u8(vget_high_u8(targetHalfReverse0_u_8x16), vget_low_u8(targetHalfReverse0_u_8x16)));
1059 const uint8x16_t targetHalfReverse1_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u32(result1_u_32x4)));
1060 vst1q_u8(targetBlock + targetStrideElements * 1u, vcombine_u8(vget_high_u8(targetHalfReverse1_u_8x16), vget_low_u8(targetHalfReverse1_u_8x16)));
1062 const uint8x16_t targetHalfReverse2_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u32(result2_u_32x4)));
1063 vst1q_u8(targetBlock + targetStrideElements * 2u, vcombine_u8(vget_high_u8(targetHalfReverse2_u_8x16), vget_low_u8(targetHalfReverse2_u_8x16)));
1065 const uint8x16_t targetHalfReverse3_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u32(result3_u_32x4)));
1066 vst1q_u8(targetBlock + targetStrideElements * 3u, vcombine_u8(vget_high_u8(targetHalfReverse3_u_8x16), vget_low_u8(targetHalfReverse3_u_8x16)));
1068 const uint8x16_t targetHalfReverse4_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u32(result4_u_32x4)));
1069 vst1q_u8(targetBlock + targetStrideElements * 4u, vcombine_u8(vget_high_u8(targetHalfReverse4_u_8x16), vget_low_u8(targetHalfReverse4_u_8x16)));
1071 const uint8x16_t targetHalfReverse5_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u32(result5_u_32x4)));
1072 vst1q_u8(targetBlock + targetStrideElements * 5u, vcombine_u8(vget_high_u8(targetHalfReverse5_u_8x16), vget_low_u8(targetHalfReverse5_u_8x16)));
1074 const uint8x16_t targetHalfReverse6_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u32(result6_u_32x4)));
1075 vst1q_u8(targetBlock + targetStrideElements * 6u, vcombine_u8(vget_high_u8(targetHalfReverse6_u_8x16), vget_low_u8(targetHalfReverse6_u_8x16)));
1077 const uint8x16_t targetHalfReverse7_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u32(result7_u_32x4)));
1078 vst1q_u8(targetBlock + targetStrideElements * 7u, vcombine_u8(vget_high_u8(targetHalfReverse7_u_8x16), vget_low_u8(targetHalfReverse7_u_8x16)));
1085 vst1q_u8(targetBlock + targetStrideElements * 0u, vreinterpretq_u8_u32(result7_u_32x4));
1086 vst1q_u8(targetBlock + targetStrideElements * 1u, vreinterpretq_u8_u32(result6_u_32x4));
1087 vst1q_u8(targetBlock + targetStrideElements * 2u, vreinterpretq_u8_u32(result5_u_32x4));
1088 vst1q_u8(targetBlock + targetStrideElements * 3u, vreinterpretq_u8_u32(result4_u_32x4));
1089 vst1q_u8(targetBlock + targetStrideElements * 4u, vreinterpretq_u8_u32(result3_u_32x4));
1090 vst1q_u8(targetBlock + targetStrideElements * 5u, vreinterpretq_u8_u32(result2_u_32x4));
1091 vst1q_u8(targetBlock + targetStrideElements * 6u, vreinterpretq_u8_u32(result1_u_32x4));
1092 vst1q_u8(targetBlock + targetStrideElements * 7u, vreinterpretq_u8_u32(result0_u_32x4));
1098 ocean_assert(
false &&
"Invalid flip direction!");
1103 template <FrameTransposer::FlipDirection tFlipDirection>
1106 ocean_assert(sourceBlock && targetBlock);
1107 ocean_assert(sourceStrideElements >= 8u * 3u && targetStrideElements >= 8u * 3u);
1112 const uint8x8x3_t line0_u_8x8x3 = vld3_u8(sourceBlock + sourceStrideElements * 0u);
1113 const uint8x8x3_t line1_u_8x8x3 = vld3_u8(sourceBlock + sourceStrideElements * 1u);
1117 const uint8x8x2_t line01_channel0_u_8x8x2 = vtrn_u8(line0_u_8x8x3.val[0], line1_u_8x8x3.val[0]);
1118 const uint8x8x2_t line01_channel1_u_8x8x2 = vtrn_u8(line0_u_8x8x3.val[1], line1_u_8x8x3.val[1]);
1119 const uint8x8x2_t line01_channel2_u_8x8x2 = vtrn_u8(line0_u_8x8x3.val[2], line1_u_8x8x3.val[2]);
1121 const uint8x8x3_t line2_u_8x8x3 = vld3_u8(sourceBlock + sourceStrideElements * 2u);
1122 const uint8x8x3_t line3_u_8x8x3 = vld3_u8(sourceBlock + sourceStrideElements * 3u);
1126 const uint8x8x2_t line23_channel0_u_8x8x2 = vtrn_u8(line2_u_8x8x3.val[0], line3_u_8x8x3.val[0]);
1127 const uint8x8x2_t line23_channel1_u_8x8x2 = vtrn_u8(line2_u_8x8x3.val[1], line3_u_8x8x3.val[1]);
1128 const uint8x8x2_t line23_channel2_u_8x8x2 = vtrn_u8(line2_u_8x8x3.val[2], line3_u_8x8x3.val[2]);
1132 const uint16x4x2_t line02_channel0_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line01_channel0_u_8x8x2.val[0]), vreinterpret_u16_u8(line23_channel0_u_8x8x2.val[0]));
1133 const uint16x4x2_t line02_channel1_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line01_channel1_u_8x8x2.val[0]), vreinterpret_u16_u8(line23_channel1_u_8x8x2.val[0]));
1134 const uint16x4x2_t line02_channel2_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line01_channel2_u_8x8x2.val[0]), vreinterpret_u16_u8(line23_channel2_u_8x8x2.val[0]));
1138 const uint16x4x2_t line13_channel0_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line01_channel0_u_8x8x2.val[1]), vreinterpret_u16_u8(line23_channel0_u_8x8x2.val[1]));
1139 const uint16x4x2_t line13_channel1_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line01_channel1_u_8x8x2.val[1]), vreinterpret_u16_u8(line23_channel1_u_8x8x2.val[1]));
1140 const uint16x4x2_t line13_channel2_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line01_channel2_u_8x8x2.val[1]), vreinterpret_u16_u8(line23_channel2_u_8x8x2.val[1]));
1142 const uint8x8x3_t line4_u_8x8x3 = vld3_u8(sourceBlock + sourceStrideElements * 4u);
1143 const uint8x8x3_t line5_u_8x8x3 = vld3_u8(sourceBlock + sourceStrideElements * 5u);
1145 const uint8x8x2_t line45_channel0_u_8x8x2 = vtrn_u8(line4_u_8x8x3.val[0], line5_u_8x8x3.val[0]);
1146 const uint8x8x2_t line45_channel1_u_8x8x2 = vtrn_u8(line4_u_8x8x3.val[1], line5_u_8x8x3.val[1]);
1147 const uint8x8x2_t line45_channel2_u_8x8x2 = vtrn_u8(line4_u_8x8x3.val[2], line5_u_8x8x3.val[2]);
1149 const uint8x8x3_t line6_u_8x8x3 = vld3_u8(sourceBlock + sourceStrideElements * 6u);
1150 const uint8x8x3_t line7_u_8x8x3 = vld3_u8(sourceBlock + sourceStrideElements * 7u);
1152 const uint8x8x2_t line67_channel0_u_8x8x2 = vtrn_u8(line6_u_8x8x3.val[0], line7_u_8x8x3.val[0]);
1153 const uint8x8x2_t line67_channel1_u_8x8x2 = vtrn_u8(line6_u_8x8x3.val[1], line7_u_8x8x3.val[1]);
1154 const uint8x8x2_t line67_channel2_u_8x8x2 = vtrn_u8(line6_u_8x8x3.val[2], line7_u_8x8x3.val[2]);
1156 const uint16x4x2_t line46_channel0_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line45_channel0_u_8x8x2.val[0]), vreinterpret_u16_u8(line67_channel0_u_8x8x2.val[0]));
1157 const uint16x4x2_t line46_channel1_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line45_channel1_u_8x8x2.val[0]), vreinterpret_u16_u8(line67_channel1_u_8x8x2.val[0]));
1158 const uint16x4x2_t line46_channel2_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line45_channel2_u_8x8x2.val[0]), vreinterpret_u16_u8(line67_channel2_u_8x8x2.val[0]));
1160 const uint16x4x2_t line57_channel0_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line45_channel0_u_8x8x2.val[1]), vreinterpret_u16_u8(line67_channel0_u_8x8x2.val[1]));
1161 const uint16x4x2_t line57_channel1_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line45_channel1_u_8x8x2.val[1]), vreinterpret_u16_u8(line67_channel1_u_8x8x2.val[1]));
1162 const uint16x4x2_t line57_channel2_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line45_channel2_u_8x8x2.val[1]), vreinterpret_u16_u8(line67_channel2_u_8x8x2.val[1]));
1164 const uint32x2x2_t line04_channel0_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line02_channel0_u_16x4x2.val[0]), vreinterpret_u32_u16(line46_channel0_u_16x4x2.val[0]));
1165 const uint32x2x2_t line04_channel1_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line02_channel1_u_16x4x2.val[0]), vreinterpret_u32_u16(line46_channel1_u_16x4x2.val[0]));
1166 const uint32x2x2_t line04_channel2_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line02_channel2_u_16x4x2.val[0]), vreinterpret_u32_u16(line46_channel2_u_16x4x2.val[0]));
1168 const uint32x2x2_t line26_channel0_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line02_channel0_u_16x4x2.val[1]), vreinterpret_u32_u16(line46_channel0_u_16x4x2.val[1]));
1169 const uint32x2x2_t line26_channel1_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line02_channel1_u_16x4x2.val[1]), vreinterpret_u32_u16(line46_channel1_u_16x4x2.val[1]));
1170 const uint32x2x2_t line26_channel2_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line02_channel2_u_16x4x2.val[1]), vreinterpret_u32_u16(line46_channel2_u_16x4x2.val[1]));
1172 const uint32x2x2_t line15_channel0_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line13_channel0_u_16x4x2.val[0]), vreinterpret_u32_u16(line57_channel0_u_16x4x2.val[0]));
1173 const uint32x2x2_t line15_channel1_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line13_channel1_u_16x4x2.val[0]), vreinterpret_u32_u16(line57_channel1_u_16x4x2.val[0]));
1174 const uint32x2x2_t line15_channel2_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line13_channel2_u_16x4x2.val[0]), vreinterpret_u32_u16(line57_channel2_u_16x4x2.val[0]));
1176 const uint32x2x2_t line37_channel0_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line13_channel0_u_16x4x2.val[1]), vreinterpret_u32_u16(line57_channel0_u_16x4x2.val[1]));
1177 const uint32x2x2_t line37_channel1_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line13_channel1_u_16x4x2.val[1]), vreinterpret_u32_u16(line57_channel1_u_16x4x2.val[1]));
1178 const uint32x2x2_t line37_channel2_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line13_channel2_u_16x4x2.val[1]), vreinterpret_u32_u16(line57_channel2_u_16x4x2.val[1]));
1180 switch (tFlipDirection)
1184 uint8x8x3_t result0_u_8x8x3;
1185 result0_u_8x8x3.val[0] = vreinterpret_u8_u32(line04_channel0_u_32x2x2.val[0]);
1186 result0_u_8x8x3.val[1] = vreinterpret_u8_u32(line04_channel1_u_32x2x2.val[0]);
1187 result0_u_8x8x3.val[2] = vreinterpret_u8_u32(line04_channel2_u_32x2x2.val[0]);
1188 vst3_u8(targetBlock + targetStrideElements * 0u, result0_u_8x8x3);
1190 uint8x8x3_t result1_u_8x8x3;
1191 result1_u_8x8x3.val[0] = vreinterpret_u8_u32(line15_channel0_u_32x2x2.val[0]);
1192 result1_u_8x8x3.val[1] = vreinterpret_u8_u32(line15_channel1_u_32x2x2.val[0]);
1193 result1_u_8x8x3.val[2] = vreinterpret_u8_u32(line15_channel2_u_32x2x2.val[0]);
1194 vst3_u8(targetBlock + targetStrideElements * 1u, result1_u_8x8x3);
1196 uint8x8x3_t result2_u_8x8x3;
1197 result2_u_8x8x3.val[0] = vreinterpret_u8_u32(line26_channel0_u_32x2x2.val[0]);
1198 result2_u_8x8x3.val[1] = vreinterpret_u8_u32(line26_channel1_u_32x2x2.val[0]);
1199 result2_u_8x8x3.val[2] = vreinterpret_u8_u32(line26_channel2_u_32x2x2.val[0]);
1200 vst3_u8(targetBlock + targetStrideElements * 2u, result2_u_8x8x3);
1202 uint8x8x3_t result3_u_8x8x3;
1203 result3_u_8x8x3.val[0] = vreinterpret_u8_u32(line37_channel0_u_32x2x2.val[0]);
1204 result3_u_8x8x3.val[1] = vreinterpret_u8_u32(line37_channel1_u_32x2x2.val[0]);
1205 result3_u_8x8x3.val[2] = vreinterpret_u8_u32(line37_channel2_u_32x2x2.val[0]);
1206 vst3_u8(targetBlock + targetStrideElements * 3u, result3_u_8x8x3);
1208 uint8x8x3_t result4_u_8x8x3;
1209 result4_u_8x8x3.val[0] = vreinterpret_u8_u32(line04_channel0_u_32x2x2.val[1]);
1210 result4_u_8x8x3.val[1] = vreinterpret_u8_u32(line04_channel1_u_32x2x2.val[1]);
1211 result4_u_8x8x3.val[2] = vreinterpret_u8_u32(line04_channel2_u_32x2x2.val[1]);
1212 vst3_u8(targetBlock + targetStrideElements * 4u, result4_u_8x8x3);
1214 uint8x8x3_t result5_u_8x8x3;
1215 result5_u_8x8x3.val[0] = vreinterpret_u8_u32(line15_channel0_u_32x2x2.val[1]);
1216 result5_u_8x8x3.val[1] = vreinterpret_u8_u32(line15_channel1_u_32x2x2.val[1]);
1217 result5_u_8x8x3.val[2] = vreinterpret_u8_u32(line15_channel2_u_32x2x2.val[1]);
1218 vst3_u8(targetBlock + targetStrideElements * 5u, result5_u_8x8x3);
1220 uint8x8x3_t result6_u_8x8x3;
1221 result6_u_8x8x3.val[0] = vreinterpret_u8_u32(line26_channel0_u_32x2x2.val[1]);
1222 result6_u_8x8x3.val[1] = vreinterpret_u8_u32(line26_channel1_u_32x2x2.val[1]);
1223 result6_u_8x8x3.val[2] = vreinterpret_u8_u32(line26_channel2_u_32x2x2.val[1]);
1224 vst3_u8(targetBlock + targetStrideElements * 6u, result6_u_8x8x3);
1226 uint8x8x3_t result7_u_8x8x3;
1227 result7_u_8x8x3.val[0] = vreinterpret_u8_u32(line37_channel0_u_32x2x2.val[1]);
1228 result7_u_8x8x3.val[1] = vreinterpret_u8_u32(line37_channel1_u_32x2x2.val[1]);
1229 result7_u_8x8x3.val[2] = vreinterpret_u8_u32(line37_channel2_u_32x2x2.val[1]);
1230 vst3_u8(targetBlock + targetStrideElements * 7u, result7_u_8x8x3);
1237 uint8x8x3_t result0_u_8x8x3;
1238 result0_u_8x8x3.val[0] = vrev64_u8(vreinterpret_u8_u32(line04_channel0_u_32x2x2.val[0]));
1239 result0_u_8x8x3.val[1] = vrev64_u8(vreinterpret_u8_u32(line04_channel1_u_32x2x2.val[0]));
1240 result0_u_8x8x3.val[2] = vrev64_u8(vreinterpret_u8_u32(line04_channel2_u_32x2x2.val[0]));
1241 vst3_u8(targetBlock + targetStrideElements * 0u, result0_u_8x8x3);
1243 uint8x8x3_t result1_u_8x8x3;
1244 result1_u_8x8x3.val[0] = vrev64_u8(vreinterpret_u8_u32(line15_channel0_u_32x2x2.val[0]));
1245 result1_u_8x8x3.val[1] = vrev64_u8(vreinterpret_u8_u32(line15_channel1_u_32x2x2.val[0]));
1246 result1_u_8x8x3.val[2] = vrev64_u8(vreinterpret_u8_u32(line15_channel2_u_32x2x2.val[0]));
1247 vst3_u8(targetBlock + targetStrideElements * 1u, result1_u_8x8x3);
1249 uint8x8x3_t result2_u_8x8x3;
1250 result2_u_8x8x3.val[0] = vrev64_u8(vreinterpret_u8_u32(line26_channel0_u_32x2x2.val[0]));
1251 result2_u_8x8x3.val[1] = vrev64_u8(vreinterpret_u8_u32(line26_channel1_u_32x2x2.val[0]));
1252 result2_u_8x8x3.val[2] = vrev64_u8(vreinterpret_u8_u32(line26_channel2_u_32x2x2.val[0]));
1253 vst3_u8(targetBlock + targetStrideElements * 2u, result2_u_8x8x3);
1255 uint8x8x3_t result3_u_8x8x3;
1256 result3_u_8x8x3.val[0] = vrev64_u8(vreinterpret_u8_u32(line37_channel0_u_32x2x2.val[0]));
1257 result3_u_8x8x3.val[1] = vrev64_u8(vreinterpret_u8_u32(line37_channel1_u_32x2x2.val[0]));
1258 result3_u_8x8x3.val[2] = vrev64_u8(vreinterpret_u8_u32(line37_channel2_u_32x2x2.val[0]));
1259 vst3_u8(targetBlock + targetStrideElements * 3u, result3_u_8x8x3);
1261 uint8x8x3_t result4_u_8x8x3;
1262 result4_u_8x8x3.val[0] = vrev64_u8(vreinterpret_u8_u32(line04_channel0_u_32x2x2.val[1]));
1263 result4_u_8x8x3.val[1] = vrev64_u8(vreinterpret_u8_u32(line04_channel1_u_32x2x2.val[1]));
1264 result4_u_8x8x3.val[2] = vrev64_u8(vreinterpret_u8_u32(line04_channel2_u_32x2x2.val[1]));
1265 vst3_u8(targetBlock + targetStrideElements * 4u, result4_u_8x8x3);
1267 uint8x8x3_t result5_u_8x8x3;
1268 result5_u_8x8x3.val[0] = vrev64_u8(vreinterpret_u8_u32(line15_channel0_u_32x2x2.val[1]));
1269 result5_u_8x8x3.val[1] = vrev64_u8(vreinterpret_u8_u32(line15_channel1_u_32x2x2.val[1]));
1270 result5_u_8x8x3.val[2] = vrev64_u8(vreinterpret_u8_u32(line15_channel2_u_32x2x2.val[1]));
1271 vst3_u8(targetBlock + targetStrideElements * 5u, result5_u_8x8x3);
1273 uint8x8x3_t result6_u_8x8x3;
1274 result6_u_8x8x3.val[0] = vrev64_u8(vreinterpret_u8_u32(line26_channel0_u_32x2x2.val[1]));
1275 result6_u_8x8x3.val[1] = vrev64_u8(vreinterpret_u8_u32(line26_channel1_u_32x2x2.val[1]));
1276 result6_u_8x8x3.val[2] = vrev64_u8(vreinterpret_u8_u32(line26_channel2_u_32x2x2.val[1]));
1277 vst3_u8(targetBlock + targetStrideElements * 6u, result6_u_8x8x3);
1279 uint8x8x3_t result7_u_8x8x3;
1280 result7_u_8x8x3.val[0] = vrev64_u8(vreinterpret_u8_u32(line37_channel0_u_32x2x2.val[1]));
1281 result7_u_8x8x3.val[1] = vrev64_u8(vreinterpret_u8_u32(line37_channel1_u_32x2x2.val[1]));
1282 result7_u_8x8x3.val[2] = vrev64_u8(vreinterpret_u8_u32(line37_channel2_u_32x2x2.val[1]));
1283 vst3_u8(targetBlock + targetStrideElements * 7u, result7_u_8x8x3);
1290 uint8x8x3_t result7_u_8x8x3;
1291 result7_u_8x8x3.val[0] = vreinterpret_u8_u32(line37_channel0_u_32x2x2.val[1]);
1292 result7_u_8x8x3.val[1] = vreinterpret_u8_u32(line37_channel1_u_32x2x2.val[1]);
1293 result7_u_8x8x3.val[2] = vreinterpret_u8_u32(line37_channel2_u_32x2x2.val[1]);
1294 vst3_u8(targetBlock + targetStrideElements * 0u, result7_u_8x8x3);
1296 uint8x8x3_t result6_u_8x8x3;
1297 result6_u_8x8x3.val[0] = vreinterpret_u8_u32(line26_channel0_u_32x2x2.val[1]);
1298 result6_u_8x8x3.val[1] = vreinterpret_u8_u32(line26_channel1_u_32x2x2.val[1]);
1299 result6_u_8x8x3.val[2] = vreinterpret_u8_u32(line26_channel2_u_32x2x2.val[1]);
1300 vst3_u8(targetBlock + targetStrideElements * 1u, result6_u_8x8x3);
1302 uint8x8x3_t result5_u_8x8x3;
1303 result5_u_8x8x3.val[0] = vreinterpret_u8_u32(line15_channel0_u_32x2x2.val[1]);
1304 result5_u_8x8x3.val[1] = vreinterpret_u8_u32(line15_channel1_u_32x2x2.val[1]);
1305 result5_u_8x8x3.val[2] = vreinterpret_u8_u32(line15_channel2_u_32x2x2.val[1]);
1306 vst3_u8(targetBlock + targetStrideElements * 2u, result5_u_8x8x3);
1308 uint8x8x3_t result4_u_8x8x3;
1309 result4_u_8x8x3.val[0] = vreinterpret_u8_u32(line04_channel0_u_32x2x2.val[1]);
1310 result4_u_8x8x3.val[1] = vreinterpret_u8_u32(line04_channel1_u_32x2x2.val[1]);
1311 result4_u_8x8x3.val[2] = vreinterpret_u8_u32(line04_channel2_u_32x2x2.val[1]);
1312 vst3_u8(targetBlock + targetStrideElements * 3u, result4_u_8x8x3);
1314 uint8x8x3_t result3_u_8x8x3;
1315 result3_u_8x8x3.val[0] = vreinterpret_u8_u32(line37_channel0_u_32x2x2.val[0]);
1316 result3_u_8x8x3.val[1] = vreinterpret_u8_u32(line37_channel1_u_32x2x2.val[0]);
1317 result3_u_8x8x3.val[2] = vreinterpret_u8_u32(line37_channel2_u_32x2x2.val[0]);
1318 vst3_u8(targetBlock + targetStrideElements * 4u, result3_u_8x8x3);
1320 uint8x8x3_t result2_u_8x8x3;
1321 result2_u_8x8x3.val[0] = vreinterpret_u8_u32(line26_channel0_u_32x2x2.val[0]);
1322 result2_u_8x8x3.val[1] = vreinterpret_u8_u32(line26_channel1_u_32x2x2.val[0]);
1323 result2_u_8x8x3.val[2] = vreinterpret_u8_u32(line26_channel2_u_32x2x2.val[0]);
1324 vst3_u8(targetBlock + targetStrideElements * 5u, result2_u_8x8x3);
1326 uint8x8x3_t result1_u_8x8x3;
1327 result1_u_8x8x3.val[0] = vreinterpret_u8_u32(line15_channel0_u_32x2x2.val[0]);
1328 result1_u_8x8x3.val[1] = vreinterpret_u8_u32(line15_channel1_u_32x2x2.val[0]);
1329 result1_u_8x8x3.val[2] = vreinterpret_u8_u32(line15_channel2_u_32x2x2.val[0]);
1330 vst3_u8(targetBlock + targetStrideElements * 6u, result1_u_8x8x3);
1332 uint8x8x3_t result0_u_8x8x3;
1333 result0_u_8x8x3.val[0] = vreinterpret_u8_u32(line04_channel0_u_32x2x2.val[0]);
1334 result0_u_8x8x3.val[1] = vreinterpret_u8_u32(line04_channel1_u_32x2x2.val[0]);
1335 result0_u_8x8x3.val[2] = vreinterpret_u8_u32(line04_channel2_u_32x2x2.val[0]);
1336 vst3_u8(targetBlock + targetStrideElements * 7u, result0_u_8x8x3);
1342 ocean_assert(
false &&
"Invalid flip direction!");
1347 template <FrameTransposer::FlipDirection tFlipDirection>
1350 ocean_assert(sourceBlock && targetBlock);
1351 ocean_assert(sourceStrideElements >= 8u * 4u && targetStrideElements >= 8u * 4u);
1355 switch (tFlipDirection)
1359 transposeBlock4x4NEON<tFlipDirection>(sourceBlock, targetBlock, sourceStrideElements, targetStrideElements);
1360 transposeBlock4x4NEON<tFlipDirection>(sourceBlock + 16, targetBlock + 4 * targetStrideElements, sourceStrideElements, targetStrideElements);
1361 transposeBlock4x4NEON<tFlipDirection>(sourceBlock + 4 * sourceStrideElements, targetBlock + 16, sourceStrideElements, targetStrideElements);
1362 transposeBlock4x4NEON<tFlipDirection>(sourceBlock + 4 * sourceStrideElements + 16, targetBlock + 4 * targetStrideElements + 16, sourceStrideElements, targetStrideElements);
1369 transposeBlock4x4NEON<tFlipDirection>(sourceBlock, targetBlock + 16, sourceStrideElements, targetStrideElements);
1370 transposeBlock4x4NEON<tFlipDirection>(sourceBlock + 16, targetBlock + 4 * targetStrideElements + 16, sourceStrideElements, targetStrideElements);
1371 transposeBlock4x4NEON<tFlipDirection>(sourceBlock + 4 * sourceStrideElements, targetBlock, sourceStrideElements, targetStrideElements);
1372 transposeBlock4x4NEON<tFlipDirection>(sourceBlock + 4 * sourceStrideElements + 16, targetBlock + 4 * targetStrideElements, sourceStrideElements, targetStrideElements);
1379 transposeBlock4x4NEON<tFlipDirection>(sourceBlock, targetBlock + 4 * targetStrideElements, sourceStrideElements, targetStrideElements);
1380 transposeBlock4x4NEON<tFlipDirection>(sourceBlock + 16, targetBlock, sourceStrideElements, targetStrideElements);
1381 transposeBlock4x4NEON<tFlipDirection>(sourceBlock + 4 * sourceStrideElements, targetBlock + 4 * targetStrideElements + 16, sourceStrideElements, targetStrideElements);
1382 transposeBlock4x4NEON<tFlipDirection>(sourceBlock + 4 * sourceStrideElements + 16, targetBlock + 16, sourceStrideElements, targetStrideElements);
1388 ocean_assert(
false &&
"Invalid flip direction!");
1394 template <
typename T,
unsigned int tChannels>
1395 template <FrameTransposer::FlipDirection tFlipDirection>
1398 ocean_assert(sourceBlock && targetBlock);
1399 ocean_assert(sourceStrideElements >= 8u && targetStrideElements >= 8u);
1403 switch (tFlipDirection)
1409 for (
unsigned int y = 0u; y < 8u; ++y)
1411 const PixelType*
const sourcePixel = (
const PixelType*)sourceBlock;
1413 for (
unsigned int x = 0u; x < 8u; ++x)
1415 *((PixelType*)(targetBlock + targetStrideElements * x)) = sourcePixel[x];
1418 sourceBlock += sourceStrideElements;
1419 targetBlock += tChannels;
1429 for (
unsigned int y = 0u; y < 8u; ++y)
1431 const PixelType*
const sourcePixel = (
const PixelType*)sourceBlock;
1433 for (
unsigned int x = 0u; x < 8u; ++x)
1435 *((PixelType*)(targetBlock + targetStrideElements * x) + (8u - y - 1u)) = sourcePixel[x];
1438 sourceBlock += sourceStrideElements;
1448 for (
unsigned int y = 0u; y < 8u; ++y)
1450 const PixelType*
const sourcePixel = (
const PixelType*)sourceBlock;
1452 for (
unsigned int x = 0u; x < 8u; ++x)
1454 *((PixelType*)(targetBlock + targetStrideElements * (8u - x - 1u)) + y) = sourcePixel[x];
1457 sourceBlock += sourceStrideElements;
1464 ocean_assert(
false &&
"Invalid flip direction!");
1468 template <
typename T,
unsigned int tChannels>
1469 template <FrameTransposer::FlipDirection tFlipDirection>
1472 ocean_assert(sourceBlock && targetBlock);
1474 ocean_assert(blockWidth >= 1u && blockHeight >= 1u);
1475 ocean_assert(blockWidth < 8u || blockHeight < 8u);
1477 ocean_assert(sourceStrideElements >= blockWidth);
1478 ocean_assert(targetStrideElements >= blockHeight);
1482 switch (tFlipDirection)
1488 for (
unsigned int y = 0u; y < blockHeight; ++y)
1490 const PixelType*
const sourcePixel = (
const PixelType*)sourceBlock;
1492 for (
unsigned int x = 0u; x < blockWidth; ++x)
1494 *((PixelType*)(targetBlock + targetStrideElements * x)) = sourcePixel[x];
1497 sourceBlock += sourceStrideElements;
1498 targetBlock += tChannels;
1508 for (
unsigned int y = 0u; y < blockHeight; ++y)
1510 const PixelType*
const sourcePixel = (
const PixelType*)sourceBlock;
1512 for (
unsigned int x = 0u; x < blockWidth; ++x)
1514 *((PixelType*)(targetBlock + targetStrideElements * x) + (blockHeight - y - 1u)) = sourcePixel[x];
1517 sourceBlock += sourceStrideElements;
1527 for (
unsigned int y = 0u; y < blockHeight; ++y)
1529 const PixelType*
const sourcePixel = (
const PixelType*)sourceBlock;
1531 for (
unsigned int x = 0u; x < blockWidth; ++x)
1533 *((PixelType*)(targetBlock + targetStrideElements * (blockWidth - x - 1u)) + y) = sourcePixel[x];
1536 sourceBlock += sourceStrideElements;
1543 ocean_assert(
false &&
"Invalid flip direction!");
1547 template <
typename TElementType,
unsigned int tChannels>
1548 inline void FrameTransposer::rotate90Subset(
const TElementType* source, TElementType* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const bool clockwise,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows)
1550 static_assert(tChannels >= 1u,
"Invalid channel number!");
1552 ocean_assert(source && target);
1553 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
1555 ocean_assert(firstTargetRow + numberTargetRows <= sourceWidth);
1557 const unsigned int& targetWidth = sourceHeight;
1573 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
1574 const unsigned int targetStrideElements = targetWidth * tChannels + targetPaddingElements;
1576 TElementType* targetRowStartElement = target + firstTargetRow * targetStrideElements;
1577 const TElementType*
const targetEndElement = targetRowStartElement + numberTargetRows * targetStrideElements - targetPaddingElements;
1578 ocean_assert_and_suppress_unused(targetRowStartElement < targetEndElement || numberTargetRows == 0u, targetEndElement);
1582 const TElementType* sourceColumnStartElement = source + (sourceHeight - 1u) * sourceStrideElements + tChannels * firstTargetRow;
1584 for (
unsigned row = 0u; row < numberTargetRows; ++row)
1586 const TElementType* sourceElement = sourceColumnStartElement;
1588 TElementType* targetElement = targetRowStartElement;
1589 const TElementType*
const targetRowEndElement = targetRowStartElement + tChannels * targetWidth;
1590 ocean_assert(targetRowEndElement <= targetEndElement);
1592 while (targetElement != targetRowEndElement)
1594 ocean_assert(sourceElement < source + sourceHeight * sourceStrideElements - sourcePaddingElements);
1595 ocean_assert(targetElement < targetEndElement);
1596 ocean_assert(targetElement < targetRowEndElement);
1598 for (
unsigned int c = 0u; c < tChannels; ++c)
1600 targetElement[c] = sourceElement[c];
1603 sourceElement -= sourceStrideElements;
1604 targetElement += tChannels;
1607 sourceColumnStartElement += tChannels;
1608 targetRowStartElement += targetStrideElements;
1613 const TElementType* sourceColumnStartElement = source + tChannels * (sourceWidth - firstTargetRow - 1u);
1615 for (
unsigned row = 0u; row < numberTargetRows; ++row)
1617 const TElementType* sourceElement = sourceColumnStartElement;
1618 ocean_assert(sourceElement >= source);
1620 TElementType* targetElement = targetRowStartElement;
1621 const TElementType*
const targetRowEndElement = targetRowStartElement + tChannels * targetWidth;
1622 ocean_assert(targetRowEndElement <= targetEndElement);
1624 while (targetElement != targetRowEndElement)
1626 ocean_assert(sourceElement < source + sourceHeight * sourceStrideElements - sourcePaddingElements);
1627 ocean_assert(targetElement < targetEndElement);
1628 ocean_assert(targetElement < targetRowEndElement);
1630 for (
unsigned int c = 0u; c < tChannels; ++c)
1632 targetElement[c] = sourceElement[c];
1635 sourceElement += sourceStrideElements;
1636 targetElement += tChannels;
1639 sourceColumnStartElement -= tChannels;
1640 targetRowStartElement += targetStrideElements;
@ CONVERT_FLIPPED_AND_MIRRORED
Rotated conversion, rotates the image by 180.0 degrees with anchor in the center of the image.
Definition: FrameConverter.h:82
Helper class for functions transposing blocks.
Definition: FrameTransposer.h:122
static OCEAN_FORCE_INLINE void transposeBlock(const T *sourceBlock, T *targetBlock, const unsigned int blockWidth, const unsigned int blockHeight, const unsigned int sourceStrideElements, const unsigned int targetStrideElements)
Transposes a block of n x m pixels.
Definition: FrameTransposer.h:1470
static OCEAN_FORCE_INLINE void transposeBlock4x4NEON(const T *sourceBlock, T *targetBlock, const unsigned int sourceStrideElements, const unsigned int targetStrideElements)
Transposes a block of 4x4 pixels.
static OCEAN_FORCE_INLINE void transposeBlock8x8(const T *sourceBlock, T *targetBlock, const unsigned int sourceStrideElements, const unsigned int targetStrideElements)
Transposes a block of 8x8 pixels.
Definition: FrameTransposer.h:1396
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition: FrameTransposer.h:39
static bool rotate(const Frame &input, Frame &output, const int angle, Worker *worker=nullptr)
Rotates a given frame with 90 degree steps.
static bool rotate180(const Frame &input, Frame &output, Worker *worker=nullptr)
Rotates a given frame by 180 degrees.
static bool rotate90(const Frame &input, Frame &output, const bool clockwise, Worker *worker=nullptr)
Rotates a given frame either clockwise or counter-clockwise by 90 degrees.
This class implements a frame transposer.
Definition: FrameTransposer.h:30
static void rotate90Subset(const TElementType *source, TElementType *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Rotates a subset of a given frame either clockwise or counter-clockwise by 90 degree.
Definition: FrameTransposer.h:1548
static void transposeSubset(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstSourceRow, const unsigned int numberSourceRows)
Transposes the subset of a given image buffer.
Definition: FrameTransposer.h:519
static bool transpose(const Frame &source, Frame &target, Worker *worker=nullptr)
Transposes a given frame.
FlipDirection
Definition of individual flip directions which can be applied to a transposed frame.
Definition: FrameTransposer.h:105
@ FD_NONE
Applying no flip.
Definition: FrameTransposer.h:107
@ FD_TOP_BOTTOM
Applying a top-bottom flip, combined with a transpose operation an image can be rotated counter clock...
Definition: FrameTransposer.h:111
@ FD_LEFT_RIGHT
Applying a left-right flip like a mirror, combined with a transpose operation an image can be rotated...
Definition: FrameTransposer.h:109
static void rotate180(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rotates a given image buffer 180 degrees.
Definition: FrameTransposer.h:455
static void rotate90(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rotates a given image buffer 90 degrees clockwise or counter clockwise.
Definition: FrameTransposer.h:395
static bool rotate(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const int angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rotates a given image with 90 degree steps.
Definition: FrameTransposer.h:468
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition: Caller.h:2876
This class implements Ocean's image class.
Definition: Frame.h:1792
void setRelativeTimestamp(const Timestamp &relative)
Sets the relative timestamp of this frame.
Definition: Frame.h:4153
bool isValid() const
Returns whether this frame is valid.
Definition: Frame.h:4448
void setTimestamp(const Timestamp ×tamp)
Sets the timestamp of this frame.
Definition: Frame.h:4148
const Timestamp & timestamp() const
Returns the timestamp of this frame.
Definition: Frame.h:4138
const Timestamp & relativeTimestamp() const
Returns the relative timestamp of this frame.
Definition: Frame.h:4143
TypeMapperBySize< sizeof(T)>::Type Type
Definition of an invalid mapped data type.
Definition: DataType.h:501
This class implements a worker able to distribute function calls over different threads.
Definition: Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15
Default definition of a type with tBytes bytes.
Definition: DataType.h:32