8#ifndef META_OCEAN_CV_FRAME_CHANNELS_H
9#define META_OCEAN_CV_FRAME_CHANNELS_H
37 static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME = 0u;
42 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
43 using RowOperatorFunction = void(*)(
const TSource* sourceRow, TTarget* targetRow,
const unsigned int width,
const unsigned int height,
unsigned int rowIndex,
const unsigned int sourceStrideElements,
const unsigned int targetStrideElements);
207 template <
typename TSource,
typename TTarget,
unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
208 static void separateTo1Channel(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements);
236 template <
typename TSource,
typename TTarget>
237 static void separateTo1Channel(
const TSource*
const sourceFrame,
const std::initializer_list<TTarget*>& targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int sourceFramePaddingElements,
const std::initializer_list<const unsigned int>& targetFramesPaddingElements);
265 template <
typename TSource,
typename TTarget,
unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
266 static void zipChannels(
const TSource*
const*
const sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements);
294 template <
typename TSource,
typename TTarget>
295 static void zipChannels(
const std::initializer_list<const TSource*>& sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const std::initializer_list<unsigned int>& sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements);
312 template <
typename T,
unsigned int tSourceChannels>
313 static inline void addFirstChannel(
const T* source,
const T* sourceNewChannel, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int sourceNewChannelPaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
329 template <
typename T,
unsigned int tSourceChannels>
330 static inline void addFirstChannelValue(
const T* source,
const T newChannelValue, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
347 template <
typename T,
unsigned int tSourceChannels>
348 static inline void addLastChannel(
const T* source,
const T* sourceNewChannel, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int sourceNewChannelPaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
364 template <
typename T,
unsigned int tSourceChannels>
365 static inline void addLastChannelValue(
const T* source,
const T newChannelValue, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
382 template <
typename T,
unsigned int tSourceChannels>
383 static inline void removeFirstChannel(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
400 template <
typename T,
unsigned int tSourceChannels>
401 static inline void removeLastChannel(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
418 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tSourceChannelIndex,
unsigned int tTargetChannelIndex>
419 static inline void copyChannel(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
433 template <
typename T,
unsigned int tChannel,
unsigned int tChannels>
434 static inline void setChannel(T* frame,
const unsigned int width,
const unsigned int height,
const T value,
const unsigned int framePaddingElements,
Worker* worker =
nullptr);
450 template <
typename T,
unsigned int tChannels>
451 static inline void reverseChannelOrder(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
477 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
478 static inline void shuffleChannels(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
505 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
506 static inline void shuffleChannelsAndSetLastChannelValue(
const T* source,
const T newChannelValue, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
520 template <
unsigned int tChannels>
521 static inline void narrow16BitPerChannelTo8BitPerChannel(
const uint16_t* source, uint8_t* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
535 template <
typename T,
unsigned int tChannels,
void (*tPixelFunction)(const T*, T*)>
536 static void applyPixelModifier(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
Worker* worker =
nullptr);
554 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tPixelFunction)(const TSource*, TTarget*)>
555 static void applyAdvancedPixelModifier(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
Worker* worker =
nullptr);
578 template <
typename TSource0,
typename TSource1,
typename TTarget,
typename TIntermediate,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
579 static void applyBivariateOperator(
const TSource0* source0,
const TSource1* source1, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int source0PaddingElements,
const unsigned int source1PaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
Worker* worker =
nullptr);
598 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
599 static void applyRowOperator(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels>& rowOperatorFunction,
Worker* worker =
nullptr);
615 template <
typename T,
unsigned int tChannels>
616 static inline void transformGeneric(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker);
629 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
630 static inline void premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t*
const frame,
const unsigned int width,
const unsigned int height,
const unsigned int framePaddingElements,
Worker* worker =
nullptr);
645 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
646 static inline void premultipliedAlphaToStraightAlpha8BitPerChannel(
const uint8_t*
const source, uint8_t*
const target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
659 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
660 static inline void straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t*
const frame,
const unsigned int width,
const unsigned int height,
const unsigned int framePaddingElements,
Worker* worker =
nullptr);
675 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
676 static inline void straightAlphaToPremultipliedAlpha8BitPerChannel(
const uint8_t*
const source, uint8_t*
const target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
686 template <
typename T,
unsigned int tChannels>
687 static void reverseRowPixelOrder(
const T* source, T* target,
const size_t size);
696 template <
typename T,
unsigned int tChannels>
697 static void reverseRowPixelOrderInPlace(T* data,
const size_t size);
708 template <
typename T,
unsigned int tChannels>
709 static void reverseRowChannelOrder(
const T* source, T* target,
const size_t size,
const void* unusedOptions =
nullptr);
731 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
732 static inline void shuffleRowChannels(
const T* source, T* target,
const size_t size,
const void* unusedOptions =
nullptr);
754 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
755 static inline void shuffleRowChannelsAndSetLastChannelValue(
const T* source, T* target,
const size_t size,
const void* options =
nullptr);
770 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2>
771 static void convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(
const uint8_t* source, uint8_t* target,
const size_t size,
const void* channelMultiplicationFactors_128);
872 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2,
bool tUseFactorChannel3>
873 static void convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(
const uint8_t* source, uint8_t* target,
const size_t size,
const void* channelMultiplicationFactors_128);
920 template <
unsigned int tChannels>
921 static void narrowRow16BitPerChannelTo8BitPerChannel(
const uint16_t* source, uint8_t* target,
const size_t size,
const void* unusedParameters =
nullptr);
937 template <
typename T,
unsigned int tSourceChannels,
bool tAddToFront>
938 static void addChannelRow(
const void** sources,
void** targets,
const unsigned int multipleRowIndex,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const void* options);
951 template <
typename T,
unsigned int tSourceChannels,
bool tAddToFront>
952 static void addChannelValueRow(
const T* source, T* target,
const size_t size,
const void* channelValueParameter);
966 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tSourceChannelIndex,
unsigned int tTargetChannelIndex>
967 static void copyChannelRow(
const T* source, T* target,
const size_t size,
const void* unusedParameters =
nullptr);
983 template <
typename TSource,
typename TTarget>
984 static void separateTo1ChannelRuntime(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements);
998 template <
typename TSource,
typename TTarget>
999 static void zipChannelsRuntime(
const TSource*
const*
const sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements);
1013 template <
typename T,
unsigned int tChannel,
unsigned int tChannels>
1014 static void setChannelSubset(T* frame,
const unsigned int width,
const T value,
const unsigned int framePaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1029 template <
typename T,
unsigned int tChannels,
void (*tPixelFunction)(const T*, T*)>
1030 static void applyPixelModifierSubset(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows);
1049 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tPixelFunction)(const TSource*, TTarget*)>
1050 static void applyAdvancedPixelModifierSubset(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows);
1073 template <
typename TSource0,
typename TSource1,
typename TTarget,
typename TIntermediate,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
1074 static void applyBivariateOperatorSubset(
const TSource0* source0,
const TSource1* source1, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int source0PaddingElements,
const unsigned int source1PaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows);
1094 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
1095 static void applyRowOperatorSubset(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourceStrideElements,
const unsigned int targetStrideElements,
const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels> rowOperatorFunction,
const unsigned int firstRow,
const unsigned int numberRows);
1111 static void transformGenericSubset(
const uint8_t* source, uint8_t* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const RowReversePixelOrderFunction<void> rowReversePixelOrderFunction,
const unsigned int bytesPerRow,
const unsigned int sourceStrideBytes,
const unsigned int targetStrideBytes,
const unsigned int firstRow,
const unsigned int numberRows);
1123 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
1124 static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t*
const frame,
const unsigned int width,
const unsigned int framePaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1138 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
1139 static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(
const uint8_t*
const source, uint8_t*
const target,
const unsigned int width,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1151 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
1152 static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t*
const frame,
const unsigned int width,
const unsigned int framePaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1166 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
1167 static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(
const uint8_t*
const source, uint8_t*
const target,
const unsigned int width,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1169#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1182 static OCEAN_FORCE_INLINE
void convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& multiplicationFactors0_128_u_16x8,
const __m128i& multiplicationFactors1_128_u_16x8,
const __m128i& multiplicationFactors2_128_u_16x8);
1211 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_128_s_16x8,
const __m128i& factorChannel10_128_s_16x8,
const __m128i& factorChannel20_128_s_16x8,
const __m128i& factorChannel01_128_s_16x8,
const __m128i& factorChannel11_128_s_16x8,
const __m128i& factorChannel21_128_s_16x8,
const __m128i& factorChannel02_128_s_16x8,
const __m128i& factorChannel12_128_s_16x8,
const __m128i& factorChannel22_128_s_16x8,
const __m128i& biasChannel0_s_16x8,
const __m128i& biasChannel1_s_16x8,
const __m128i& biasChannel2_s_16x8);
1240 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_1024_s_16x8,
const __m128i& factorChannel10_1024_s_16x8,
const __m128i& factorChannel20_1024_s_16x8,
const __m128i& factorChannel01_1024_s_16x8,
const __m128i& factorChannel11_1024_s_16x8,
const __m128i& factorChannel21_1024_s_16x8,
const __m128i& factorChannel02_1024_s_16x8,
const __m128i& factorChannel12_1024_s_16x8,
const __m128i& factorChannel22_1024_s_16x8,
const __m128i& biasChannel0_1024_s_32x4,
const __m128i& biasChannel1_1024_s_32x4,
const __m128i& biasChannel2_1024_s_32x4);
1269 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_64_s_16x8,
const __m128i& factorChannel10_64_s_16x8,
const __m128i& factorChannel20_64_s_16x8,
const __m128i& factorChannel01_64_s_16x8,
const __m128i& factorChannel11_64_s_16x8,
const __m128i& factorChannel21_64_s_16x8,
const __m128i& factorChannel02_64_s_16x8,
const __m128i& factorChannel12_64_s_16x8,
const __m128i& factorChannel22_64_s_16x8,
const __m128i& biasChannel0_s_16x8,
const __m128i& biasChannel1_s_16x8,
const __m128i& biasChannel2_s_16x8);
1300 static OCEAN_FORCE_INLINE
void convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_64_s_16x8,
const __m128i& factorChannel10_64_s_16x8,
const __m128i& factorChannel20_64_s_16x8,
const __m128i& factorChannel01_64_s_16x8,
const __m128i& factorChannel11_64_s_16x8,
const __m128i& factorChannel21_64_s_16x8,
const __m128i& factorChannel02_64_s_16x8,
const __m128i& factorChannel12_64_s_16x8,
const __m128i& factorChannel22_64_s_16x8,
const __m128i& biasChannel0_s_16x8,
const __m128i& biasChannel1_s_16x8,
const __m128i& biasChannel2_s_16x8,
const __m128i& channelValue3_u_8x16);
1332 static OCEAN_FORCE_INLINE
void convert4ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_128_s_16x8,
const __m128i& factorChannel10_128_s_16x8,
const __m128i& factorChannel20_128_s_16x8,
const __m128i& factorChannel01_128_s_16x8,
const __m128i& factorChannel11_128_s_16x8,
const __m128i& factorChannel21_128_s_16x8,
const __m128i& factorChannel02_128_s_16x8,
const __m128i& factorChannel12_128_s_16x8,
const __m128i& factorChannel22_128_s_16x8,
const __m128i& factorChannel03_128_s_16x8,
const __m128i& factorChannel13_128_s_16x8,
const __m128i& factorChannel23_128_s_16x8,
const __m128i& biasChannel0_s_16x8,
const __m128i& biasChannel1_s_16x8,
const __m128i& biasChannel2_s_16x8);
1343 static OCEAN_FORCE_INLINE
void convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& multiplicationFactors0123_128_s_32x);
1355 static OCEAN_FORCE_INLINE
void convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& multiplicationFactorsChannel0_0123_128_s_16x8,
const __m128i& multiplicationFactorsChannel1_0123_128_s_16x8);
1359#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1375 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2>
1405 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8);
1434 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8);
1463 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_128_s_16x8,
const int16x8_t& factorChannel10_128_s_16x8,
const int16x8_t& factorChannel20_128_s_16x8,
const int16x8_t& factorChannel01_128_s_16x8,
const int16x8_t& factorChannel11_128_s_16x8,
const int16x8_t& factorChannel21_128_s_16x8,
const int16x8_t& factorChannel02_128_s_16x8,
const int16x8_t& factorChannel12_128_s_16x8,
const int16x8_t& factorChannel22_128_s_16x8,
const int16x8_t& biasChannel0_128_s_16x8,
const int16x8_t& biasChannel1_128_s_16x8,
const int16x8_t& biasChannel2_128_s_16x8);
1492 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x4_t& factorChannel00_1024_s_16x4,
const int16x4_t& factorChannel10_1024_s_16x4,
const int16x4_t& factorChannel20_1024_s_16x4,
const int16x4_t& factorChannel01_1024_s_16x4,
const int16x4_t& factorChannel11_1024_s_16x4,
const int16x4_t& factorChannel21_1024_s_16x4,
const int16x4_t& factorChannel02_1024_s_16x4,
const int16x4_t& factorChannel12_1024_s_16x4,
const int16x4_t& factorChannel22_1024_s_16x4,
const int32x4_t& biasChannel0_1024_s_32x4,
const int32x4_t& biasChannel1_1024_s_32x4,
const int32x4_t& biasChannel2_1024_s_32x4);
1521 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x4_t& factorChannel00_1024_s_16x4,
const int16x4_t& factorChannel10_1024_s_16x4,
const int16x4_t& factorChannel20_1024_s_16x4,
const int16x4_t& factorChannel01_1024_s_16x4,
const int16x4_t& factorChannel11_1024_s_16x4,
const int16x4_t& factorChannel21_1024_s_16x4,
const int16x4_t& factorChannel02_1024_s_16x4,
const int16x4_t& factorChannel12_1024_s_16x4,
const int16x4_t& factorChannel22_1024_s_16x4,
const int32x4_t& biasChannel0_1024_s_32x4,
const int32x4_t& biasChannel1_1024_s_32x4,
const int32x4_t& biasChannel2_1024_s_32x4);
1550 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_128_s_16x8,
const int16x8_t& factorChannel10_128_s_16x8,
const int16x8_t& factorChannel20_128_s_16x8,
const int16x8_t& factorChannel01_128_s_16x8,
const int16x8_t& factorChannel11_128_s_16x8,
const int16x8_t& factorChannel21_128_s_16x8,
const int16x8_t& factorChannel02_128_s_16x8,
const int16x8_t& factorChannel12_128_s_16x8,
const int16x8_t& factorChannel22_128_s_16x8,
const int16x8_t& biasChannel0_128_s_16x8,
const int16x8_t& biasChannel1_128_s_16x8,
const int16x8_t& biasChannel2_128_s_16x8);
1582 static OCEAN_FORCE_INLINE
void convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8,
const uint8x16_t& channelValue3_u_8x16);
1600 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2,
bool tUseFactorChannel3>
1619 static OCEAN_FORCE_INLINE
void convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const uint8x8_t& factorChannel00_128_u_8x8,
const uint8x8_t& factorChannel10_128_u_8x8,
const uint8x8_t& factorChannel01_128_u_8x8,
const uint8x8_t& factorChannel11_128_u_8x8,
const uint8x8_t& factorChannel02_128_u_8x8,
const uint8x8_t& factorChannel12_128_u_8x8,
const uint8x8_t& factorChannel03_128_u_8x8,
const uint8x8_t& factorChannel13_128_u_8x8);
1651 static OCEAN_FORCE_INLINE
void convert4ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_128_s_16x8,
const int16x8_t& factorChannel10_128_s_16x8,
const int16x8_t& factorChannel20_128_s_16x8,
const int16x8_t& factorChannel01_128_s_16x8,
const int16x8_t& factorChannel11_128_s_16x8,
const int16x8_t& factorChannel21_128_s_16x8,
const int16x8_t& factorChannel02_128_s_16x8,
const int16x8_t& factorChannel12_128_s_16x8,
const int16x8_t& factorChannel22_128_s_16x8,
const int16x8_t& factorChannel03_128_s_16x8,
const int16x8_t& factorChannel13_128_s_16x8,
const int16x8_t& factorChannel23_128_s_16x8,
const int16x8_t& biasChannel0_128_s_16x8,
const int16x8_t& biasChannel1_128_s_16x8,
const int16x8_t& biasChannel2_128_s_16x8);
1657#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1660inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 2u>(
const uint8_t*
const sourceFrame, uint8_t*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
1662 ocean_assert(sourceFrame !=
nullptr);
1663 ocean_assert(targetFrames !=
nullptr);
1665 ocean_assert(width != 0u && height != 0u);
1666 ocean_assert(channels == 2u);
1668 constexpr unsigned int tChannels = 2u;
1670 bool allTargetFramesContinuous =
true;
1672 if (targetFramesPaddingElements !=
nullptr)
1674 for (
unsigned int n = 0u; n < tChannels; ++n)
1676 if (targetFramesPaddingElements[n] != 0u)
1678 allTargetFramesContinuous =
false;
1684 const uint8_t* source = sourceFrame;
1685 uint8_t* target0 = targetFrames[0];
1686 uint8_t* target1 = targetFrames[1];
1688 constexpr unsigned int tBlockSize = 16u;
1690 uint8x16x2_t source_8x16x2;
1692 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1694 const unsigned int pixels = width * height;
1695 const unsigned int blocks = pixels / tBlockSize;
1696 const unsigned int remaining = pixels % tBlockSize;
1698 for (
unsigned int n = 0u; n < blocks; ++n)
1700 source_8x16x2 = vld2q_u8(source);
1702 vst1q_u8(target0, source_8x16x2.val[0]);
1703 vst1q_u8(target1, source_8x16x2.val[1]);
1705 source += tBlockSize * tChannels;
1707 target0 += tBlockSize;
1708 target1 += tBlockSize;
1711 for (
unsigned int n = 0u; n < remaining; ++n)
1713 target0[n] = source[n * tChannels + 0u];
1714 target1[n] = source[n * tChannels + 1u];
1719 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[0];
1720 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[1];
1722 const unsigned int blocks = width / tBlockSize;
1723 const unsigned int remaining = width % tBlockSize;
1725 for (
unsigned int y = 0u; y < height; ++y)
1727 for (
unsigned int n = 0u; n < blocks; ++n)
1729 source_8x16x2 = vld2q_u8(source);
1731 vst1q_u8(target0, source_8x16x2.val[0]);
1732 vst1q_u8(target1, source_8x16x2.val[1]);
1734 source += tBlockSize * tChannels;
1736 target0 += tBlockSize;
1737 target1 += tBlockSize;
1740 for (
unsigned int n = 0u; n < remaining; ++n)
1742 target0[n] = source[n * tChannels + 0u];
1743 target1[n] = source[n * tChannels + 1u];
1746 source += remaining * tChannels + sourceFramePaddingElements;
1747 target0 += remaining + targetFrame0PaddingElements;
1748 target1 += remaining + targetFrame1PaddingElements;
1754inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 3u>(
const uint8_t*
const sourceFrame, uint8_t*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
1756 ocean_assert(sourceFrame !=
nullptr);
1757 ocean_assert(targetFrames !=
nullptr);
1759 ocean_assert(width != 0u && height != 0u);
1760 ocean_assert(channels == 3u);
1762 constexpr unsigned int tChannels = 3u;
1764 bool allTargetFramesContinuous =
true;
1766 if (targetFramesPaddingElements !=
nullptr)
1768 for (
unsigned int n = 0u; n < tChannels; ++n)
1770 if (targetFramesPaddingElements[n] != 0u)
1772 allTargetFramesContinuous =
false;
1778 const uint8_t* source = sourceFrame;
1779 uint8_t* target0 = targetFrames[0];
1780 uint8_t* target1 = targetFrames[1];
1781 uint8_t* target2 = targetFrames[2];
1783 constexpr unsigned int tBlockSize = 16u;
1785 uint8x16x3_t source_8x16x3;
1787 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1789 const unsigned int pixels = width * height;
1790 const unsigned int blocks = pixels / tBlockSize;
1791 const unsigned int remaining = pixels % tBlockSize;
1793 for (
unsigned int n = 0u; n < blocks; ++n)
1795 source_8x16x3 = vld3q_u8(source);
1797 vst1q_u8(target0, source_8x16x3.val[0]);
1798 vst1q_u8(target1, source_8x16x3.val[1]);
1799 vst1q_u8(target2, source_8x16x3.val[2]);
1801 source += tBlockSize * tChannels;
1803 target0 += tBlockSize;
1804 target1 += tBlockSize;
1805 target2 += tBlockSize;
1808 for (
unsigned int n = 0u; n < remaining; ++n)
1810 target0[n] = source[n * tChannels + 0u];
1811 target1[n] = source[n * tChannels + 1u];
1812 target2[n] = source[n * tChannels + 2u];
1817 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[0];
1818 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[1];
1819 const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[2];
1821 const unsigned int blocks = width / tBlockSize;
1822 const unsigned int remaining = width % tBlockSize;
1824 for (
unsigned int y = 0u; y < height; ++y)
1826 for (
unsigned int n = 0u; n < blocks; ++n)
1828 source_8x16x3 = vld3q_u8(source);
1830 vst1q_u8(target0, source_8x16x3.val[0]);
1831 vst1q_u8(target1, source_8x16x3.val[1]);
1832 vst1q_u8(target2, source_8x16x3.val[2]);
1834 source += tBlockSize * tChannels;
1836 target0 += tBlockSize;
1837 target1 += tBlockSize;
1838 target2 += tBlockSize;
1841 for (
unsigned int n = 0u; n < remaining; ++n)
1843 target0[n] = source[n * tChannels + 0u];
1844 target1[n] = source[n * tChannels + 1u];
1845 target2[n] = source[n * tChannels + 2u];
1848 source += remaining * tChannels + sourceFramePaddingElements;
1849 target0 += remaining + targetFrame0PaddingElements;
1850 target1 += remaining + targetFrame1PaddingElements;
1851 target2 += remaining + targetFrame2PaddingElements;
1857inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 4u>(
const uint8_t*
const sourceFrame, uint8_t*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
1859 ocean_assert(sourceFrame !=
nullptr);
1860 ocean_assert(targetFrames !=
nullptr);
1862 ocean_assert(width != 0u && height != 0u);
1863 ocean_assert(channels == 4u);
1865 constexpr unsigned int tChannels = 4u;
1867 bool allTargetFramesContinuous =
true;
1869 if (targetFramesPaddingElements !=
nullptr)
1871 for (
unsigned int n = 0u; n < tChannels; ++n)
1873 if (targetFramesPaddingElements[n] != 0u)
1875 allTargetFramesContinuous =
false;
1881 const uint8_t* source = sourceFrame;
1882 uint8_t* target0 = targetFrames[0];
1883 uint8_t* target1 = targetFrames[1];
1884 uint8_t* target2 = targetFrames[2];
1885 uint8_t* target3 = targetFrames[3];
1887 constexpr unsigned int tBlockSize = 16u;
1889 uint8x16x4_t source_8x16x4;
1891 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1893 const unsigned int pixels = width * height;
1894 const unsigned int blocks = pixels / tBlockSize;
1895 const unsigned int remaining = pixels % tBlockSize;
1897 for (
unsigned int n = 0u; n < blocks; ++n)
1899 source_8x16x4 = vld4q_u8(source);
1901 vst1q_u8(target0, source_8x16x4.val[0]);
1902 vst1q_u8(target1, source_8x16x4.val[1]);
1903 vst1q_u8(target2, source_8x16x4.val[2]);
1904 vst1q_u8(target3, source_8x16x4.val[3]);
1906 source += tBlockSize * tChannels;
1908 target0 += tBlockSize;
1909 target1 += tBlockSize;
1910 target2 += tBlockSize;
1911 target3 += tBlockSize;
1914 for (
unsigned int n = 0u; n < remaining; ++n)
1916 target0[n] = source[n * tChannels + 0u];
1917 target1[n] = source[n * tChannels + 1u];
1918 target2[n] = source[n * tChannels + 2u];
1919 target3[n] = source[n * tChannels + 3u];
1924 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[0];
1925 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[1];
1926 const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[2];
1927 const unsigned int targetFrame3PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[3];
1929 const unsigned int blocks = width / tBlockSize;
1930 const unsigned int remaining = width % tBlockSize;
1932 for (
unsigned int y = 0u; y < height; ++y)
1934 for (
unsigned int n = 0u; n < blocks; ++n)
1936 source_8x16x4 = vld4q_u8(source);
1938 vst1q_u8(target0, source_8x16x4.val[0]);
1939 vst1q_u8(target1, source_8x16x4.val[1]);
1940 vst1q_u8(target2, source_8x16x4.val[2]);
1941 vst1q_u8(target3, source_8x16x4.val[3]);
1943 source += tBlockSize * tChannels;
1945 target0 += tBlockSize;
1946 target1 += tBlockSize;
1947 target2 += tBlockSize;
1948 target3 += tBlockSize;
1951 for (
unsigned int n = 0u; n < remaining; ++n)
1953 target0[n] = source[n * tChannels + 0u];
1954 target1[n] = source[n * tChannels + 1u];
1955 target2[n] = source[n * tChannels + 2u];
1956 target3[n] = source[n * tChannels + 3u];
1959 source += remaining * tChannels + sourceFramePaddingElements;
1960 target0 += remaining + targetFrame0PaddingElements;
1961 target1 += remaining + targetFrame1PaddingElements;
1962 target2 += remaining + targetFrame2PaddingElements;
1963 target3 += remaining + targetFrame3PaddingElements;
1970template <
typename TSource,
typename TTarget,
unsigned int tChannels>
1971void FrameChannels::separateTo1Channel(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
1973 ocean_assert(sourceFrame !=
nullptr);
1974 ocean_assert(targetFrames !=
nullptr);
1976 ocean_assert(width != 0u && height != 0u);
1982 separateTo1ChannelRuntime<TSource, TTarget>(sourceFrame, targetFrames, width, height, channels, sourceFramePaddingElements, targetFramesPaddingElements);
1987 for (
unsigned int c = 0u; c < tChannels; ++c)
1989 ocean_assert(targetFrames[c] !=
nullptr);
1993 if (sourceFramePaddingElements == 0u && targetFramesPaddingElements ==
nullptr)
1995 for (
unsigned int n = 0u; n < width * height; ++n)
1997 for (
unsigned int c = 0u; c < tChannels; ++c)
1999 targetFrames[c][n] = TTarget(sourceFrame[n * tChannels + c]);
2003 else if (targetFramesPaddingElements ==
nullptr)
2005 ocean_assert(sourceFramePaddingElements != 0u);
2007 const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
2009 for (
unsigned int y = 0u; y < height; ++y)
2011 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
2013 const unsigned int targetRowOffset = y * width;
2015 for (
unsigned int x = 0u; x < width; ++x)
2017 for (
unsigned int c = 0u; c < tChannels; ++c)
2019 *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * tChannels + c));
2026 const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
2028 Indices32 targetFrameStrideElements(tChannels);
2030 for (
unsigned int c = 0u; c < tChannels; ++c)
2032 targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
2035 for (
unsigned int y = 0u; y < height; ++y)
2037 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
2039 for (
unsigned int x = 0u; x < width; ++x)
2041 for (
unsigned int c = 0u; c < tChannels; ++c)
2043 *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * tChannels + c));
2050template <
typename TSource,
typename TTarget>
2051void FrameChannels::separateTo1Channel(
const TSource*
const sourceFrame,
const std::initializer_list<TTarget*>& targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int sourceFramePaddingElements,
const std::initializer_list<const unsigned int>& targetFramesPaddingElements)
2053 ocean_assert(targetFrames.size() >= 1);
2054 ocean_assert(targetFramesPaddingElements.size() == 0 || targetFrames.size() == targetFramesPaddingElements.size());
2056 if (targetFrames.size() == 2)
2058 separateTo1Channel<TSource, TTarget, 2u>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
2060 else if (targetFrames.size() == 3)
2062 separateTo1Channel<TSource, TTarget, 3u>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
2064 else if (targetFrames.size() == 4)
2066 separateTo1Channel<TSource, TTarget, 4u>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
2070 separateTo1Channel<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
2074#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
2077inline void FrameChannels::zipChannels<uint8_t, uint8_t, 2u>(
const uint8_t*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2079 ocean_assert(sourceFrames !=
nullptr);
2080 ocean_assert(targetFrame !=
nullptr);
2082 ocean_assert(width != 0u && height != 0u);
2083 ocean_assert(channels == 2u);
2085 constexpr unsigned int tChannels = 2u;
2087 bool allSourceFramesContinuous =
true;
2089 if (sourceFramesPaddingElements !=
nullptr)
2091 for (
unsigned int n = 0u; n < tChannels; ++n)
2093 if (sourceFramesPaddingElements[n] != 0u)
2095 allSourceFramesContinuous =
false;
2101 const uint8_t* source0 = sourceFrames[0];
2102 const uint8_t* source1 = sourceFrames[1];
2103 uint8_t* target = targetFrame;
2105 constexpr unsigned int tBlockSize = 16u;
2107 uint8x16x2_t source_8x16x2;
2109 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2111 const unsigned int pixels = width * height;
2112 const unsigned int blocks = pixels / tBlockSize;
2113 const unsigned int remaining = pixels % tBlockSize;
2115 for (
unsigned int n = 0u; n < blocks; ++n)
2117 source_8x16x2.val[0] = vld1q_u8(source0);
2118 source_8x16x2.val[1] = vld1q_u8(source1);
2120 vst2q_u8(target, source_8x16x2);
2122 source0 += tBlockSize;
2123 source1 += tBlockSize;
2125 target += tBlockSize * tChannels;
2128 for (
unsigned int n = 0u; n < remaining; ++n)
2130 target[n * tChannels + 0u] = source0[n];
2131 target[n * tChannels + 1u] = source1[n];
2136 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2137 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2139 const unsigned int blocks = width / tBlockSize;
2140 const unsigned int remaining = width % tBlockSize;
2142 for (
unsigned int y = 0u; y < height; ++y)
2144 for (
unsigned int n = 0u; n < blocks; ++n)
2146 source_8x16x2.val[0] = vld1q_u8(source0);
2147 source_8x16x2.val[1] = vld1q_u8(source1);
2149 vst2q_u8(target, source_8x16x2);
2151 source0 += tBlockSize;
2152 source1 += tBlockSize;
2154 target += tBlockSize * tChannels;
2157 for (
unsigned int n = 0u; n < remaining; ++n)
2159 target[n * tChannels + 0u] = source0[n];
2160 target[n * tChannels + 1u] = source1[n];
2163 source0 += remaining + sourceFrame0PaddingElements;
2164 source1 += remaining + sourceFrame1PaddingElements;
2165 target += remaining * tChannels + targetFramePaddingElements;
2171inline void FrameChannels::zipChannels<uint8_t, uint8_t, 3u>(
const uint8_t*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2173 ocean_assert(sourceFrames !=
nullptr);
2174 ocean_assert(targetFrame !=
nullptr);
2176 ocean_assert(width != 0u && height != 0u);
2177 ocean_assert(channels == 3u);
2179 constexpr unsigned int tChannels = 3u;
2181 bool allSourceFramesContinuous =
true;
2183 if (sourceFramesPaddingElements !=
nullptr)
2185 for (
unsigned int n = 0u; n < tChannels; ++n)
2187 if (sourceFramesPaddingElements[n] != 0u)
2189 allSourceFramesContinuous =
false;
2195 const uint8_t* source0 = sourceFrames[0];
2196 const uint8_t* source1 = sourceFrames[1];
2197 const uint8_t* source2 = sourceFrames[2];
2198 uint8_t* target = targetFrame;
2200 constexpr unsigned int tBlockSize = 16u;
2202 uint8x16x3_t source_8x16x3;
2204 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2206 const unsigned int pixels = width * height;
2207 const unsigned int blocks = pixels / tBlockSize;
2208 const unsigned int remaining = pixels % tBlockSize;
2210 for (
unsigned int n = 0u; n < blocks; ++n)
2212 source_8x16x3.val[0] = vld1q_u8(source0);
2213 source_8x16x3.val[1] = vld1q_u8(source1);
2214 source_8x16x3.val[2] = vld1q_u8(source2);
2216 vst3q_u8(target, source_8x16x3);
2218 source0 += tBlockSize;
2219 source1 += tBlockSize;
2220 source2 += tBlockSize;
2222 target += tBlockSize * tChannels;
2225 for (
unsigned int n = 0u; n < remaining; ++n)
2227 target[n * tChannels + 0u] = source0[n];
2228 target[n * tChannels + 1u] = source1[n];
2229 target[n * tChannels + 2u] = source2[n];
2234 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2235 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2236 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[2];
2238 const unsigned int blocks = width / tBlockSize;
2239 const unsigned int remaining = width % tBlockSize;
2241 for (
unsigned int y = 0u; y < height; ++y)
2243 for (
unsigned int n = 0u; n < blocks; ++n)
2245 source_8x16x3.val[0] = vld1q_u8(source0);
2246 source_8x16x3.val[1] = vld1q_u8(source1);
2247 source_8x16x3.val[2] = vld1q_u8(source2);
2249 vst3q_u8(target, source_8x16x3);
2251 source0 += tBlockSize;
2252 source1 += tBlockSize;
2253 source2 += tBlockSize;
2255 target += tBlockSize * tChannels;
2258 for (
unsigned int n = 0u; n < remaining; ++n)
2260 target[n * tChannels + 0u] = source0[n];
2261 target[n * tChannels + 1u] = source1[n];
2262 target[n * tChannels + 2u] = source2[n];
2265 source0 += remaining + sourceFrame0PaddingElements;
2266 source1 += remaining + sourceFrame1PaddingElements;
2267 source2 += remaining + sourceFrame2PaddingElements;
2268 target += remaining * tChannels + targetFramePaddingElements;
2274inline void FrameChannels::zipChannels<uint8_t, uint8_t, 4u>(
const uint8_t*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2276 ocean_assert(sourceFrames !=
nullptr);
2277 ocean_assert(targetFrame !=
nullptr);
2279 ocean_assert(width != 0u && height != 0u);
2280 ocean_assert(channels == 4u);
2282 constexpr unsigned int tChannels = 4u;
2284 bool allSourceFramesContinuous =
true;
2286 if (sourceFramesPaddingElements !=
nullptr)
2288 for (
unsigned int n = 0u; n < tChannels; ++n)
2290 if (sourceFramesPaddingElements[n] != 0u)
2292 allSourceFramesContinuous =
false;
2298 const uint8_t* source0 = sourceFrames[0];
2299 const uint8_t* source1 = sourceFrames[1];
2300 const uint8_t* source2 = sourceFrames[2];
2301 const uint8_t* source3 = sourceFrames[3];
2302 uint8_t* target = targetFrame;
2304 constexpr unsigned int tBlockSize = 16u;
2306 uint8x16x4_t source_8x16x4;
2308 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2310 const unsigned int pixels = width * height;
2311 const unsigned int blocks = pixels / tBlockSize;
2312 const unsigned int remaining = pixels % tBlockSize;
2314 for (
unsigned int n = 0u; n < blocks; ++n)
2316 source_8x16x4.val[0] = vld1q_u8(source0);
2317 source_8x16x4.val[1] = vld1q_u8(source1);
2318 source_8x16x4.val[2] = vld1q_u8(source2);
2319 source_8x16x4.val[3] = vld1q_u8(source3);
2321 vst4q_u8(target, source_8x16x4);
2323 source0 += tBlockSize;
2324 source1 += tBlockSize;
2325 source2 += tBlockSize;
2326 source3 += tBlockSize;
2328 target += tBlockSize * tChannels;
2331 for (
unsigned int n = 0u; n < remaining; ++n)
2333 target[n * tChannels + 0u] = source0[n];
2334 target[n * tChannels + 1u] = source1[n];
2335 target[n * tChannels + 2u] = source2[n];
2336 target[n * tChannels + 3u] = source3[n];
2341 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2342 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2343 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[2];
2344 const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[3];
2346 const unsigned int blocks = width / tBlockSize;
2347 const unsigned int remaining = width % tBlockSize;
2349 for (
unsigned int y = 0u; y < height; ++y)
2351 for (
unsigned int n = 0u; n < blocks; ++n)
2353 source_8x16x4.val[0] = vld1q_u8(source0);
2354 source_8x16x4.val[1] = vld1q_u8(source1);
2355 source_8x16x4.val[2] = vld1q_u8(source2);
2356 source_8x16x4.val[3] = vld1q_u8(source3);
2358 vst4q_u8(target, source_8x16x4);
2360 source0 += tBlockSize;
2361 source1 += tBlockSize;
2362 source2 += tBlockSize;
2363 source3 += tBlockSize;
2365 target += tBlockSize * tChannels;
2368 for (
unsigned int n = 0u; n < remaining; ++n)
2370 target[n * tChannels + 0u] = source0[n];
2371 target[n * tChannels + 1u] = source1[n];
2372 target[n * tChannels + 2u] = source2[n];
2373 target[n * tChannels + 3u] = source3[n];
2376 source0 += remaining + sourceFrame0PaddingElements;
2377 source1 += remaining + sourceFrame1PaddingElements;
2378 source2 += remaining + sourceFrame2PaddingElements;
2379 source3 += remaining + sourceFrame3PaddingElements;
2380 target += remaining * tChannels + targetFramePaddingElements;
2386inline void FrameChannels::zipChannels<float, uint8_t, 2u>(
const float*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2388 ocean_assert(sourceFrames !=
nullptr);
2389 ocean_assert(targetFrame !=
nullptr);
2391 ocean_assert(width != 0u && height != 0u);
2392 ocean_assert(channels == 2u);
2394 constexpr unsigned int tChannels = 2u;
2396 bool allSourceFramesContinuous =
true;
2398 if (sourceFramesPaddingElements !=
nullptr)
2400 for (
unsigned int n = 0u; n < tChannels; ++n)
2402 if (sourceFramesPaddingElements[n] != 0u)
2404 allSourceFramesContinuous =
false;
2410 const float* source0 = sourceFrames[0];
2411 const float* source1 = sourceFrames[1];
2412 uint8_t* target = targetFrame;
2414 constexpr unsigned int tBlockSize = 16u;
2416 uint8x16x2_t target_8x16x2;
2418 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2420 const unsigned int pixels = width * height;
2421 const unsigned int blocks = pixels / tBlockSize;
2422 const unsigned int remaining = pixels % tBlockSize;
2424 for (
unsigned int n = 0u; n < blocks; ++n)
2429 vst2q_u8(target, target_8x16x2);
2431 source0 += tBlockSize;
2432 source1 += tBlockSize;
2434 target += tBlockSize * tChannels;
2437 for (
unsigned int n = 0u; n < remaining; ++n)
2439 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2440 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2442 target[n * tChannels + 0u] = uint8_t(source0[n]);
2443 target[n * tChannels + 1u] = uint8_t(source1[n]);
2448 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2449 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2451 const unsigned int blocks = width / tBlockSize;
2452 const unsigned int remaining = width % tBlockSize;
2454 for (
unsigned int y = 0u; y < height; ++y)
2456 for (
unsigned int n = 0u; n < blocks; ++n)
2461 vst2q_u8(target, target_8x16x2);
2463 source0 += tBlockSize;
2464 source1 += tBlockSize;
2466 target += tBlockSize * tChannels;
2469 for (
unsigned int n = 0u; n < remaining; ++n)
2471 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2472 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2474 target[n * tChannels + 0u] = uint8_t(source0[n]);
2475 target[n * tChannels + 1u] = uint8_t(source1[n]);
2478 source0 += remaining + sourceFrame0PaddingElements;
2479 source1 += remaining + sourceFrame1PaddingElements;
2480 target += remaining * tChannels + targetFramePaddingElements;
2486inline void FrameChannels::zipChannels<float, uint8_t, 3u>(
const float*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2488 ocean_assert(sourceFrames !=
nullptr);
2489 ocean_assert(targetFrame !=
nullptr);
2491 ocean_assert(width != 0u && height != 0u);
2492 ocean_assert(channels == 3u);
2494 constexpr unsigned int tChannels = 3u;
2496 bool allSourceFramesContinuous =
true;
2498 if (sourceFramesPaddingElements !=
nullptr)
2500 for (
unsigned int n = 0u; n < tChannels; ++n)
2502 if (sourceFramesPaddingElements[n] != 0u)
2504 allSourceFramesContinuous =
false;
2510 const float* source0 = sourceFrames[0];
2511 const float* source1 = sourceFrames[1];
2512 const float* source2 = sourceFrames[2];
2513 uint8_t* target = targetFrame;
2515 constexpr unsigned int tBlockSize = 16u;
2517 uint8x16x3_t target_8x16x3;
2519 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2521 const unsigned int pixels = width * height;
2522 const unsigned int blocks = pixels / tBlockSize;
2523 const unsigned int remaining = pixels % tBlockSize;
2525 for (
unsigned int n = 0u; n < blocks; ++n)
2531 vst3q_u8(target, target_8x16x3);
2533 source0 += tBlockSize;
2534 source1 += tBlockSize;
2535 source2 += tBlockSize;
2537 target += tBlockSize * tChannels;
2540 for (
unsigned int n = 0u; n < remaining; ++n)
2542 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2543 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2544 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2546 target[n * tChannels + 0u] = uint8_t(source0[n]);
2547 target[n * tChannels + 1u] = uint8_t(source1[n]);
2548 target[n * tChannels + 2u] = uint8_t(source2[n]);
2553 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2554 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2555 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[2];
2557 const unsigned int blocks = width / tBlockSize;
2558 const unsigned int remaining = width % tBlockSize;
2560 for (
unsigned int y = 0u; y < height; ++y)
2562 for (
unsigned int n = 0u; n < blocks; ++n)
2569 vst3q_u8(target, target_8x16x3);
2571 source0 += tBlockSize;
2572 source1 += tBlockSize;
2573 source2 += tBlockSize;
2575 target += tBlockSize * tChannels;
2578 for (
unsigned int n = 0u; n < remaining; ++n)
2580 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2581 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2582 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2584 target[n * tChannels + 0u] = uint8_t(source0[n]);
2585 target[n * tChannels + 1u] = uint8_t(source1[n]);
2586 target[n * tChannels + 2u] = uint8_t(source2[n]);
2589 source0 += remaining + sourceFrame0PaddingElements;
2590 source1 += remaining + sourceFrame1PaddingElements;
2591 source2 += remaining + sourceFrame2PaddingElements;
2592 target += remaining * tChannels + targetFramePaddingElements;
2598inline void FrameChannels::zipChannels<float, uint8_t, 4u>(
const float*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2600 ocean_assert(sourceFrames !=
nullptr);
2601 ocean_assert(targetFrame !=
nullptr);
2603 ocean_assert(width != 0u && height != 0u);
2604 ocean_assert(channels == 4u);
2606 constexpr unsigned int tChannels = 4u;
2608 bool allSourceFramesContinuous =
true;
2610 if (sourceFramesPaddingElements !=
nullptr)
2612 for (
unsigned int n = 0u; n < tChannels; ++n)
2614 if (sourceFramesPaddingElements[n] != 0u)
2616 allSourceFramesContinuous =
false;
2622 const float* source0 = sourceFrames[0];
2623 const float* source1 = sourceFrames[1];
2624 const float* source2 = sourceFrames[2];
2625 const float* source3 = sourceFrames[3];
2626 uint8_t* target = targetFrame;
2628 constexpr unsigned int tBlockSize = 16u;
2630 uint8x16x4_t target_8x16x4;
2632 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2634 const unsigned int pixels = width * height;
2635 const unsigned int blocks = pixels / tBlockSize;
2636 const unsigned int remaining = pixels % tBlockSize;
2638 for (
unsigned int n = 0u; n < blocks; ++n)
2645 vst4q_u8(target, target_8x16x4);
2647 source0 += tBlockSize;
2648 source1 += tBlockSize;
2649 source2 += tBlockSize;
2650 source3 += tBlockSize;
2652 target += tBlockSize * tChannels;
2655 for (
unsigned int n = 0u; n < remaining; ++n)
2657 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2658 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2659 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2660 ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
2662 target[n * tChannels + 0u] = uint8_t(source0[n]);
2663 target[n * tChannels + 1u] = uint8_t(source1[n]);
2664 target[n * tChannels + 2u] = uint8_t(source2[n]);
2665 target[n * tChannels + 3u] = uint8_t(source3[n]);
2670 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2671 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2672 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[2];
2673 const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[3];
2675 const unsigned int blocks = width / tBlockSize;
2676 const unsigned int remaining = width % tBlockSize;
2678 for (
unsigned int y = 0u; y < height; ++y)
2680 for (
unsigned int n = 0u; n < blocks; ++n)
2687 vst4q_u8(target, target_8x16x4);
2689 source0 += tBlockSize;
2690 source1 += tBlockSize;
2691 source2 += tBlockSize;
2692 source3 += tBlockSize;
2694 target += tBlockSize * tChannels;
2697 for (
unsigned int n = 0u; n < remaining; ++n)
2699 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2700 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2701 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2702 ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
2704 target[n * tChannels + 0u] = uint8_t(source0[n]);
2705 target[n * tChannels + 1u] = uint8_t(source1[n]);
2706 target[n * tChannels + 2u] = uint8_t(source2[n]);
2707 target[n * tChannels + 3u] = uint8_t(source3[n]);
2710 source0 += remaining + sourceFrame0PaddingElements;
2711 source1 += remaining + sourceFrame1PaddingElements;
2712 source2 += remaining + sourceFrame2PaddingElements;
2713 source3 += remaining + sourceFrame3PaddingElements;
2714 target += remaining * tChannels + targetFramePaddingElements;
2721template <
typename TSource,
typename TTarget,
unsigned int tChannels>
2722void FrameChannels::zipChannels(
const TSource*
const* sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2724 ocean_assert(sourceFrames !=
nullptr);
2725 ocean_assert(targetFrame !=
nullptr);
2727 ocean_assert(width != 0u && height != 0u);
2733 zipChannelsRuntime<TSource, TTarget>(sourceFrames, targetFrame, width, height, channels, sourceFramesPaddingElements, targetFramePaddingElements);
2737 bool allSourceFramesContinuous =
true;
2739 if (sourceFramesPaddingElements !=
nullptr)
2741 for (
unsigned int n = 0u; n < tChannels; ++n)
2743 if (sourceFramesPaddingElements[n] != 0u)
2745 allSourceFramesContinuous =
false;
2751 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2753 for (
unsigned int n = 0u; n < width * height; ++n)
2755 for (
unsigned int c = 0u; c < tChannels; ++c)
2757 targetFrame[n * tChannels + c] = TTarget(sourceFrames[c][n]);
2763 const unsigned int targetFrameStrideElements = width * tChannels + targetFramePaddingElements;
2765 Indices32 sourceFrameStrideElements(tChannels);
2767 for (
unsigned int c = 0u; c < tChannels; ++c)
2769 if (sourceFramesPaddingElements ==
nullptr)
2771 sourceFrameStrideElements[c] = width;
2775 sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
2779 for (
unsigned int y = 0u; y < height; ++y)
2781 TTarget*
const targetRow = targetFrame + y * targetFrameStrideElements;
2783 for (
unsigned int x = 0u; x < width; ++x)
2785 for (
unsigned int c = 0u; c < tChannels; ++c)
2787 *(targetRow + x * tChannels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x));
2794template <
typename TSource,
typename TTarget>
2795void FrameChannels::zipChannels(
const std::initializer_list<const TSource*>& sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const std::initializer_list<unsigned int>& sourceFramePaddingElements,
const unsigned int targetFramePaddingElements)
2797 ocean_assert(sourceFrames.size() >= 1);
2798 ocean_assert(sourceFramePaddingElements.size() == 0 || sourceFrames.size() == sourceFramePaddingElements.size());
2800 if (sourceFrames.size() == 2)
2802 zipChannels<TSource, TTarget, 2u>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2804 else if (sourceFrames.size() == 3)
2806 zipChannels<TSource, TTarget, 3u>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2808 else if (sourceFrames.size() == 4)
2810 zipChannels<TSource, TTarget, 4u>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2814 zipChannels<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2818template <
typename T,
unsigned int tSourceChannels>
2819inline void FrameChannels::addFirstChannel(
const T* source,
const T* sourceNewChannel, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int sourceNewChannelPaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
2821 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
2823 ocean_assert(source !=
nullptr && sourceNewChannel !=
nullptr && target !=
nullptr);
2824 ocean_assert(source != target);
2825 ocean_assert(width >= 1u && height >= 1u);
2827 const unsigned int options[3] = {sourcePaddingElements, sourceNewChannelPaddingElements, targetPaddingElements};
2829 const void* sources[2] = {source, sourceNewChannel};
2831 FrameConverter::convertArbitraryPixelFormat(sources, (
void**)&target, width, height, conversionFlag, 1u, FrameChannels::addChannelRow<T, tSourceChannels, true>, options, worker);
2834template <
typename T,
unsigned int tSourceChannels>
2837 static_assert(tSourceChannels >= 1u,
"Invalid channel number!");
2839 ocean_assert(source !=
nullptr && target !=
nullptr);
2840 ocean_assert(width >= 1u && height >= 1u);
2842 const unsigned int targetChannels = tSourceChannels + 1u;
2844 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2845 const unsigned int targetStrideElements = width * targetChannels + targetPaddingElements;
2847 const void* channelValueParameter = (
const void*)(&newChannelValue);
2849 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2851 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::addChannelValueRow<T, tSourceChannels, true>, FrameChannels::reverseRowPixelOrderInPlace<T, targetChannels>, areContinuous, channelValueParameter, worker);
2854template <
typename T,
unsigned int tSourceChannels>
2855inline void FrameChannels::addLastChannel(
const T* source,
const T* sourceNewChannel, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int sourceNewChannelPaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
2857 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
2859 ocean_assert(source !=
nullptr && sourceNewChannel !=
nullptr && target !=
nullptr);
2860 ocean_assert(source != target);
2861 ocean_assert(width >= 1u && height >= 1u);
2863 const unsigned int options[3] = {sourcePaddingElements, sourceNewChannelPaddingElements, targetPaddingElements};
2865 const void* sources[2] = {source, sourceNewChannel};
2867 FrameConverter::convertArbitraryPixelFormat(sources, (
void**)&target, width, height, conversionFlag, 1u, FrameChannels::addChannelRow<T, tSourceChannels, false>, options, worker);
2870template <
typename T,
unsigned int tSourceChannels>
2871inline void FrameChannels::addLastChannelValue(
const T* source,
const T newChannelValue, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
2873 static_assert(tSourceChannels >= 1u,
"Invalid channel number!");
2875 ocean_assert(source !=
nullptr && target !=
nullptr);
2876 ocean_assert(width >= 1u && height >= 1u);
2878 const unsigned int targetChannels = tSourceChannels + 1u;
2880 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2881 const unsigned int targetStrideElements = width * targetChannels + targetPaddingElements;
2883 const void* channelValueParameter = (
const void*)(&newChannelValue);
2885 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2887 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::addChannelValueRow<T, tSourceChannels, false>, FrameChannels::reverseRowPixelOrderInPlace<T, targetChannels>, areContinuous, channelValueParameter, worker);
2890template <
typename T,
unsigned int tSourceChannels>
2893 static_assert(tSourceChannels >= 2u && tSourceChannels <= 8u,
"Invalid channel number!");
2895 ocean_assert(source !=
nullptr && target !=
nullptr);
2896 ocean_assert(width >= 1u && height >= 1u);
2898 const unsigned int shufflePatternMax = 0x07654321u;
2899 const unsigned int mask = 0xFFFFFFFFu >> ((8u - tSourceChannels + 1u) * 4u);
2901 const unsigned int shufflePattern = shufflePatternMax & mask;
2903 FrameChannels::shuffleChannels<T, tSourceChannels, tSourceChannels - 1u, shufflePattern>(source, target, width, height, conversionFlag, sourcePaddingElements, targetPaddingElements, worker);
2906template <
typename T,
unsigned int tSourceChannels>
2909 static_assert(tSourceChannels >= 2u && tSourceChannels <= 8u,
"Invalid channel number!");
2911 ocean_assert(source !=
nullptr && target !=
nullptr);
2912 ocean_assert(width >= 1u && height >= 1u);
2914 const unsigned int shufflePatternMax = 0x76543210u;
2915 const unsigned int mask = 0xFFFFFFFFu >> ((8u - tSourceChannels + 1u) * 4u);
2917 const unsigned int shufflePattern = shufflePatternMax & mask;
2919 FrameChannels::shuffleChannels<T, tSourceChannels, tSourceChannels - 1u, shufflePattern>(source, target, width, height, conversionFlag, sourcePaddingElements, targetPaddingElements, worker);
2922template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tSourceChannelIndex,
unsigned int tTargetChannelIndex>
2923inline void FrameChannels::copyChannel(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
2925 static_assert(tSourceChannels >= 1u,
"Invalid number of channels!");
2926 static_assert(tTargetChannels >= 1u,
"Invalid number of channels!");
2928 static_assert(tSourceChannelIndex < tSourceChannels,
"Invalid channel index!");
2929 static_assert(tTargetChannelIndex < tTargetChannels,
"Invalid channel index!");
2931 ocean_assert(source !=
nullptr && target !=
nullptr);
2932 ocean_assert(width >= 1u && height >= 1u);
2934 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2935 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
2939 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2941 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements,
CONVERT_NORMAL, FrameChannels::copyChannelRow<T, tSourceChannels, tTargetChannels, tSourceChannelIndex, tTargetChannelIndex>, reversePixelOrderRowInPlaceFunction, areContinuous,
nullptr, worker);
2944template <
typename T,
unsigned int tChannel,
unsigned int tChannels>
2945inline void FrameChannels::setChannel(T* frame,
const unsigned int width,
const unsigned int height,
const T value,
const unsigned int framePaddingElements,
Worker* worker)
2947 static_assert(tChannels >= 1u,
"Invalid channel number!");
2948 static_assert(tChannel < tChannels,
"Invalid channel index!");
2950 ocean_assert(frame !=
nullptr);
2951 ocean_assert(width >= 1u && height >= 1u);
2955 worker->
executeFunction(
Worker::Function::createStatic(&setChannelSubset<T, tChannel, tChannels>, frame, width, value, framePaddingElements, 0u, 0u), 0u, height);
2959 setChannelSubset<T, tChannel, tChannels>(frame, width, value, framePaddingElements, 0u, height);
2963template <
typename T,
unsigned int tChannels>
2966 static_assert(tChannels >= 1u,
"Invalid channel number!");
2968 ocean_assert(source !=
nullptr && target !=
nullptr);
2969 ocean_assert(width >= 1u && height >= 1u);
2971 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
2972 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
2974 constexpr bool areContinuous =
false;
2976 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::reverseRowChannelOrder<T, tChannels>, FrameChannels::reverseRowPixelOrderInPlace<T, tChannels>, areContinuous,
nullptr, worker);
2979template <
typename T,
unsigned int tChannels>
2982 static_assert(tChannels >= 1u,
"Invalid channel number!");
2984 ocean_assert(source !=
nullptr && target !=
nullptr);
2985 ocean_assert(size >= 1);
2988 const T*
const debugSourceStart = source;
2989 const T*
const debugSourceEnd = debugSourceStart + size * tChannels;
2991 const T*
const debugTargetStart = target;
2992 const T*
const debugTargetEnd = debugTargetStart + size * tChannels;
2996 target += size * tChannels;
2998 const T*
const sourceEnd = source + size * tChannels;
3000#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3002 if constexpr (std::is_same<typename TypeMapper<T>::Type, uint8_t>::value)
3004 const size_t blocks16 = size /
size_t(16);
3010 for (
size_t n = 0; n < blocks16; ++n)
3012 target -= 16u * tChannels;
3014 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3015 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3017 const uint8x16_t source_u_8x16 = vld1q_u8((
const uint8_t*)(source));
3018 uint8x16_t revSource_u_8x16 = vrev64q_u8(source_u_8x16);
3019 revSource_u_8x16 = vcombine_u8(vget_high_u8(revSource_u_8x16), vget_low_u8(revSource_u_8x16));
3021 vst1q_u8((uint8_t*)(target), revSource_u_8x16);
3023 source += 16u * tChannels;
3031 for (
size_t n = 0; n < blocks16; ++n)
3033 target -= 16u * tChannels;
3035 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3036 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3038 const uint8x16_t sourceA_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 0);
3039 const uint8x16_t sourceB_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 16);
3041 const uint8x16_t revSourceA_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u8(sourceA_u_8x16)));
3042 const uint8x16_t revSourceB_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u8(sourceB_u_8x16)));
3044 const uint8x16_t targetA_u_8x16 = vcombine_u8(vget_high_u8(revSourceA_u_8x16), vget_low_u8(revSourceA_u_8x16));
3045 const uint8x16_t targetB_u_8x16 = vcombine_u8(vget_high_u8(revSourceB_u_8x16), vget_low_u8(revSourceB_u_8x16));
3047 vst1q_u8((uint8_t*)(target) + 0, targetB_u_8x16);
3048 vst1q_u8((uint8_t*)(target) + 16, targetA_u_8x16);
3050 source += 16u * tChannels;
3058 for (
size_t n = 0; n < blocks16; ++n)
3060 target -= 16u * tChannels;
3062 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3063 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3065 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)(source));
3067 uint8x16x3_t revSource_u_8x16x3;
3068 revSource_u_8x16x3.val[0] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[0])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[0])));
3069 revSource_u_8x16x3.val[1] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[1])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[1])));
3070 revSource_u_8x16x3.val[2] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[2])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[2])));
3072 vst3q_u8((uint8_t*)(target), revSource_u_8x16x3);
3074 source += 16u * tChannels;
3082 for (
size_t n = 0; n < blocks16; ++n)
3084 target -= 16u * tChannels;
3086 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3087 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3089 const uint8x16_t sourceA_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 0);
3090 const uint8x16_t sourceB_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 16);
3091 const uint8x16_t sourceC_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 32);
3092 const uint8x16_t sourceD_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 48);
3094 const uint8x16_t revSourceA_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceA_u_8x16)));
3095 const uint8x16_t revSourceB_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceB_u_8x16)));
3096 const uint8x16_t revSourceC_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceC_u_8x16)));
3097 const uint8x16_t revSourceD_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceD_u_8x16)));
3099 const uint8x16_t targetA_u_8x16 = vcombine_u8(vget_high_u8(revSourceA_u_8x16), vget_low_u8(revSourceA_u_8x16));
3100 const uint8x16_t targetB_u_8x16 = vcombine_u8(vget_high_u8(revSourceB_u_8x16), vget_low_u8(revSourceB_u_8x16));
3101 const uint8x16_t targetC_u_8x16 = vcombine_u8(vget_high_u8(revSourceC_u_8x16), vget_low_u8(revSourceC_u_8x16));
3102 const uint8x16_t targetD_u_8x16 = vcombine_u8(vget_high_u8(revSourceD_u_8x16), vget_low_u8(revSourceD_u_8x16));
3104 vst1q_u8((uint8_t*)(target) + 0, targetD_u_8x16);
3105 vst1q_u8((uint8_t*)(target) + 16, targetC_u_8x16);
3106 vst1q_u8((uint8_t*)(target) + 32, targetB_u_8x16);
3107 vst1q_u8((uint8_t*)(target) + 48, targetA_u_8x16);
3109 source += 16u * tChannels;
3122 while (source != sourceEnd)
3124 ocean_assert(source < sourceEnd);
3126 for (
unsigned int n = 0u; n < tChannels; ++n)
3128 ocean_assert(source + tChannels - n - 1u >= debugSourceStart);
3129 ocean_assert(source + tChannels - n - 1u < debugSourceEnd);
3131 ocean_assert(target > debugTargetStart && target <= debugTargetEnd);
3133 *--target = source[tChannels - n - 1u];
3136 source += tChannels;
3140template <
typename T,
unsigned int tChannels>
3143 static_assert(tChannels >= 1u,
"Invalid channel number!");
3145 ocean_assert(data !=
nullptr);
3146 ocean_assert(size >= 1);
3152#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3154 if constexpr (std::is_same<typename TypeMapper<T>::Type, uint8_t>::value)
3158 const size_t blocks32 = size /
size_t(32);
3160 uint8_t* left = (uint8_t*)(data);
3161 uint8_t* right = (uint8_t*)(data) + (size - 16u) * tChannels;
3167 for (
size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3169 const uint8x16_t left_u_8x16 = vld1q_u8(left);
3170 const uint8x16_t right_u_8x16 = vld1q_u8(right);
3172 uint8x16_t revLeft_u_8x16 = vrev64q_u8(left_u_8x16);
3173 revLeft_u_8x16 = vcombine_u8(vget_high_u8(revLeft_u_8x16), vget_low_u8(revLeft_u_8x16));
3175 uint8x16_t revRight_u_8x16 = vrev64q_u8(right_u_8x16);
3176 revRight_u_8x16 = vcombine_u8(vget_high_u8(revRight_u_8x16), vget_low_u8(revRight_u_8x16));
3178 vst1q_u8(left, revRight_u_8x16);
3179 vst1q_u8(right, revLeft_u_8x16);
3181 left += 16u * tChannels;
3182 right -= 16u * tChannels;
3185 n += blocks32 * 16u;
3192 for (
size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3194 const uint8x16x2_t left_u_8x16x2 = vld2q_u8(left);
3195 const uint8x16x2_t right_u_8x16x2 = vld2q_u8(right);
3197 uint8x16x2_t revLeft_u_8x16x2;
3198 revLeft_u_8x16x2.val[0] = vrev64q_u8(left_u_8x16x2.val[0]);
3199 revLeft_u_8x16x2.val[1] = vrev64q_u8(left_u_8x16x2.val[1]);
3200 revLeft_u_8x16x2.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x2.val[0]), vget_low_u8(revLeft_u_8x16x2.val[0]));
3201 revLeft_u_8x16x2.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x2.val[1]), vget_low_u8(revLeft_u_8x16x2.val[1]));
3203 uint8x16x2_t revRight_u_8x16x2;
3204 revRight_u_8x16x2.val[0] = vrev64q_u8(right_u_8x16x2.val[0]);
3205 revRight_u_8x16x2.val[1] = vrev64q_u8(right_u_8x16x2.val[1]);
3206 revRight_u_8x16x2.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x2.val[0]), vget_low_u8(revRight_u_8x16x2.val[0]));
3207 revRight_u_8x16x2.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x2.val[1]), vget_low_u8(revRight_u_8x16x2.val[1]));
3209 vst2q_u8(left, revRight_u_8x16x2);
3210 vst2q_u8(right, revLeft_u_8x16x2);
3212 left += 16u * tChannels;
3213 right -= 16u * tChannels;
3216 n += blocks32 * 16u;
3223 for (
size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3225 const uint8x16x3_t left_u_8x16x3 = vld3q_u8(left);
3226 const uint8x16x3_t right_u_8x16x3 = vld3q_u8(right);
3228 uint8x16x3_t revLeft_u_8x16x3;
3229 revLeft_u_8x16x3.val[0] = vrev64q_u8(left_u_8x16x3.val[0]);
3230 revLeft_u_8x16x3.val[1] = vrev64q_u8(left_u_8x16x3.val[1]);
3231 revLeft_u_8x16x3.val[2] = vrev64q_u8(left_u_8x16x3.val[2]);
3232 revLeft_u_8x16x3.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[0]), vget_low_u8(revLeft_u_8x16x3.val[0]));
3233 revLeft_u_8x16x3.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[1]), vget_low_u8(revLeft_u_8x16x3.val[1]));
3234 revLeft_u_8x16x3.val[2] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[2]), vget_low_u8(revLeft_u_8x16x3.val[2]));
3236 uint8x16x3_t revRight_u_8x16x3;
3237 revRight_u_8x16x3.val[0] = vrev64q_u8(right_u_8x16x3.val[0]);
3238 revRight_u_8x16x3.val[1] = vrev64q_u8(right_u_8x16x3.val[1]);
3239 revRight_u_8x16x3.val[2] = vrev64q_u8(right_u_8x16x3.val[2]);
3240 revRight_u_8x16x3.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[0]), vget_low_u8(revRight_u_8x16x3.val[0]));
3241 revRight_u_8x16x3.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[1]), vget_low_u8(revRight_u_8x16x3.val[1]));
3242 revRight_u_8x16x3.val[2] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[2]), vget_low_u8(revRight_u_8x16x3.val[2]));
3244 vst3q_u8(left, revRight_u_8x16x3);
3245 vst3q_u8(right, revLeft_u_8x16x3);
3247 left += 16u * tChannels;
3248 right -= 16u * tChannels;
3251 n += blocks32 * 16u;
3258 for (
size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3260 const uint8x16x4_t left_u_8x16x4 = vld4q_u8(left);
3261 const uint8x16x4_t right_u_8x16x4 = vld4q_u8(right);
3263 uint8x16x4_t revLeft_u_8x16x4;
3264 revLeft_u_8x16x4.val[0] = vrev64q_u8(left_u_8x16x4.val[0]);
3265 revLeft_u_8x16x4.val[1] = vrev64q_u8(left_u_8x16x4.val[1]);
3266 revLeft_u_8x16x4.val[2] = vrev64q_u8(left_u_8x16x4.val[2]);
3267 revLeft_u_8x16x4.val[3] = vrev64q_u8(left_u_8x16x4.val[3]);
3268 revLeft_u_8x16x4.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[0]), vget_low_u8(revLeft_u_8x16x4.val[0]));
3269 revLeft_u_8x16x4.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[1]), vget_low_u8(revLeft_u_8x16x4.val[1]));
3270 revLeft_u_8x16x4.val[2] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[2]), vget_low_u8(revLeft_u_8x16x4.val[2]));
3271 revLeft_u_8x16x4.val[3] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[3]), vget_low_u8(revLeft_u_8x16x4.val[3]));
3273 uint8x16x4_t revRight_u_8x16x4;
3274 revRight_u_8x16x4.val[0] = vrev64q_u8(right_u_8x16x4.val[0]);
3275 revRight_u_8x16x4.val[1] = vrev64q_u8(right_u_8x16x4.val[1]);
3276 revRight_u_8x16x4.val[2] = vrev64q_u8(right_u_8x16x4.val[2]);
3277 revRight_u_8x16x4.val[3] = vrev64q_u8(right_u_8x16x4.val[3]);
3278 revRight_u_8x16x4.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[0]), vget_low_u8(revRight_u_8x16x4.val[0]));
3279 revRight_u_8x16x4.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[1]), vget_low_u8(revRight_u_8x16x4.val[1]));
3280 revRight_u_8x16x4.val[2] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[2]), vget_low_u8(revRight_u_8x16x4.val[2]));
3281 revRight_u_8x16x4.val[3] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[3]), vget_low_u8(revRight_u_8x16x4.val[3]));
3283 vst4q_u8(left, revRight_u_8x16x4);
3284 vst4q_u8(right, revLeft_u_8x16x4);
3286 left += 16u * tChannels;
3287 right -= 16u * tChannels;
3290 n += blocks32 * 16u;
3303 PixelType intermediate;
3305 PixelType*
const pixels = (PixelType*)(data);
3307 while (n < size / 2)
3309 intermediate = pixels[n];
3311 pixels[n] = pixels[size - n - 1];
3312 pixels[size - n - 1] = intermediate;
3318template <
typename T,
unsigned int tChannels>
3321 ocean_assert(source !=
nullptr && target !=
nullptr);
3322 ocean_assert(source != target);
3323 ocean_assert(size >= 1);
3326 const T*
const debugSourceStart = source;
3327 const T*
const debugSourceEnd = debugSourceStart + size * tChannels;
3329 const T*
const debugTargetStart = target;
3330 const T*
const debugTargetEnd = debugTargetStart + size * tChannels;
3333 if constexpr (tChannels == 1)
3337 memcpy(target, source,
sizeof(T) * size);
3341 const T*
const sourceEnd = source + size * tChannels;
3343#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
3347 const size_t blocks16 = size /
size_t(16);
3352 ocean_assert(
false &&
"This should have been handled above!");
3357 for (
size_t n = 0; n < blocks16; ++n)
3361 source += 16u * tChannels;
3362 target += 16u * tChannels;
3370 for (
size_t n = 0; n < blocks16; ++n)
3374 source += 16u * tChannels;
3375 target += 16u * tChannels;
3383 for (
size_t n = 0; n < blocks16; ++n)
3387 source += 16u * tChannels;
3388 target += 16u * tChannels;
3399#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3403 const size_t blocks16 = size /
size_t(16);
3408 ocean_assert(
false &&
"This should have been handled above!");
3413 for (
size_t n = 0; n < blocks16; ++n)
3415 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3416 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3418 const uint8x16_t sourceA_u_8x16 = vld1q_u8((
const uint8_t*)source + 0);
3419 const uint8x16_t sourceB_u_8x16 = vld1q_u8((
const uint8_t*)source + 16);
3421 const uint8x16_t revSourceA_u_8x16 = vrev16q_u8(sourceA_u_8x16);
3422 const uint8x16_t revSourceB_u_8x16 = vrev16q_u8(sourceB_u_8x16);
3424 vst1q_u8((uint8_t*)target + 0, revSourceA_u_8x16);
3425 vst1q_u8((uint8_t*)target + 16, revSourceB_u_8x16);
3427 source += 16u * tChannels;
3428 target += 16u * tChannels;
3436 for (
size_t n = 0; n < blocks16; ++n)
3438 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3439 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3441 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3443 uint8x16x3_t revSource_u_8x16x3;
3444 revSource_u_8x16x3.val[0] = source_u_8x16x3.val[2];
3445 revSource_u_8x16x3.val[1] = source_u_8x16x3.val[1];
3446 revSource_u_8x16x3.val[2] = source_u_8x16x3.val[0];
3448 vst3q_u8((uint8_t*)target, revSource_u_8x16x3);
3450 source += 16u * tChannels;
3451 target += 16u * tChannels;
3459 for (
size_t n = 0; n < blocks16; ++n)
3461 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3462 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3464 const uint8x16_t sourceA_u_8x16 = vld1q_u8((
const uint8_t*)source + 0);
3465 const uint8x16_t sourceB_u_8x16 = vld1q_u8((
const uint8_t*)source + 16);
3466 const uint8x16_t sourceC_u_8x16 = vld1q_u8((
const uint8_t*)source + 32);
3467 const uint8x16_t sourceD_u_8x16 = vld1q_u8((
const uint8_t*)source + 48);
3469 const uint8x16_t revSourceA_u_8x16 = vrev32q_u8(sourceA_u_8x16);
3470 const uint8x16_t revSourceB_u_8x16 = vrev32q_u8(sourceB_u_8x16);
3471 const uint8x16_t revSourceC_u_8x16 = vrev32q_u8(sourceC_u_8x16);
3472 const uint8x16_t revSourceD_u_8x16 = vrev32q_u8(sourceD_u_8x16);
3474 vst1q_u8((uint8_t*)target + 0, revSourceA_u_8x16);
3475 vst1q_u8((uint8_t*)target + 16, revSourceB_u_8x16);
3476 vst1q_u8((uint8_t*)target + 32, revSourceC_u_8x16);
3477 vst1q_u8((uint8_t*)target + 48, revSourceD_u_8x16);
3479 source += 16u * tChannels;
3480 target += 16u * tChannels;
3493 while (source != sourceEnd)
3495 ocean_assert(source < sourceEnd);
3497 ocean_assert(source >= debugSourceStart && source + tChannels <= debugSourceEnd);
3498 ocean_assert(target >= debugTargetStart && target + tChannels <= debugTargetEnd);
3500 for (
unsigned int n = 0u; n < tChannels; ++n)
3502 target[n] = source[tChannels - n - 1u];
3505 source += tChannels;
3506 target += tChannels;
3510template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
3513 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u,
"Invalid channel number!");
3514 static_assert(tTargetChannels >= 1u && tTargetChannels <= 8u,
"Invalid channel number!");
3516 static_assert(tSourceChannels != 1u || tTargetChannels != 1u,
"Invalid channel number!");
3518 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels,
"Invalid shuffle pattern!");
3519 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels,
"Invalid shuffle pattern!");
3520 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels,
"Invalid shuffle pattern!");
3521 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels,
"Invalid shuffle pattern!");
3522 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels,
"Invalid shuffle pattern!");
3523 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels,
"Invalid shuffle pattern!");
3524 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels,
"Invalid shuffle pattern!");
3525 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels,
"Invalid shuffle pattern!");
3527 ocean_assert(source !=
nullptr && target !=
nullptr);
3528 ocean_assert(size != 0);
3530 const T*
const sourceEnd = source + size * tSourceChannels;
3532#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
3536 const size_t blocks16 = size /
size_t(16);
3538 switch (tSourceChannels | ((tTargetChannels) << 4u))
3541 case (4u | (4u << 4u)):
3545 constexpr unsigned int offset1 = 0x04040404u;
3546 constexpr unsigned int offset2 = 0x08080808u;
3547 constexpr unsigned int offset3 = 0x0C0C0C0Cu;
3550 const unsigned int shufflePattern0 = ((tShufflePattern & 0xF000u) << 12u) | ((tShufflePattern & 0x0F00u) << 8u) | ((tShufflePattern & 0x00F0u) << 4u) | ((tShufflePattern & 0x000Fu) << 0u);
3552 const unsigned int shufflePattern1 = shufflePattern0 + offset1;
3553 const unsigned int shufflePattern2 = shufflePattern0 + offset2;
3554 const unsigned int shufflePattern3 = shufflePattern0 + offset3;
3556 const __m128i shufflePattern128 =
SSE::set128i((((
unsigned long long)shufflePattern3) << 32ull) | (
unsigned long long)shufflePattern2, (((
unsigned long long)shufflePattern1) << 32ull) | (
unsigned long long)shufflePattern0);
3558 for (
size_t n = 0; n < blocks16; ++n)
3565 source += 16u * tSourceChannels;
3566 target += 16u * tTargetChannels;
3578#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3582 const size_t blocks16 = size /
size_t(16);
3584 switch (tSourceChannels | ((tTargetChannels) << 4u))
3587 case (1u | (3u << 4u)):
3589 static_assert(tSourceChannels != 1u || tShufflePattern == 0u,
"Invalid shuffle patter!");
3591 for (
size_t n = 0; n < blocks16; ++n)
3593 const uint8x16_t source_u_8x16 = vld1q_u8((
const uint8_t*)source);
3595 uint8x16x3_t target_u_8x16x3;
3597 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3599 target_u_8x16x3.val[nT] = source_u_8x16;
3602 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3604 source += 16u * tSourceChannels;
3605 target += 16u * tTargetChannels;
3612 case (2u | (1u << 4u)):
3614 for (
size_t n = 0; n < blocks16; ++n)
3616 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((
const uint8_t*)source);
3618 constexpr unsigned int sourceChannel = tShufflePattern & 0x00000001u;
3619 static_assert(sourceChannel <= 1u,
"Invalid shuffle pattern!");
3620 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3622 const uint8x16_t target_u_8x16 = source_u_8x16x2.val[sourceChannel];
3624 vst1q_u8((uint8_t*)target, target_u_8x16);
3626 source += 16u * tSourceChannels;
3627 target += 16u * tTargetChannels;
3634 case (2u | (3u << 4u)):
3636 for (
size_t n = 0; n < blocks16; ++n)
3638 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((
const uint8_t*)source);
3640 uint8x16x3_t target_u_8x16x3;
3642 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3644 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000001u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3646 target_u_8x16x3.val[nT] = source_u_8x16x2.val[(tShufflePattern >> (nT * 4u)) & 0x00000001u];
3649 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3651 source += 16u * tSourceChannels;
3652 target += 16u * tTargetChannels;
3659 case (2u | (4u << 4u)):
3661 for (
size_t n = 0; n < blocks16; ++n)
3663 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((
const uint8_t*)source);
3665 uint8x16x4_t target_u_8x16x4;
3667 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3669 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000001u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3671 target_u_8x16x4.val[nT] = source_u_8x16x2.val[(tShufflePattern >> (nT * 4u)) & 0x00000001u];
3674 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3676 source += 16u * tSourceChannels;
3677 target += 16u * tTargetChannels;
3684 case (3u | (1u << 4u)):
3686 constexpr unsigned int sourceChannel = (tShufflePattern & 0x0000000Fu) <= 2u ? (tShufflePattern & 0x0000000Fu) : 2u;
3687 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3689 for (
size_t n = 0; n < blocks16; ++n)
3691 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3693 const uint8x16_t target_u_8x16 = source_u_8x16x3.val[sourceChannel];
3695 vst1q_u8((uint8_t*)target, target_u_8x16);
3697 source += 16u * tSourceChannels;
3698 target += 16u * tTargetChannels;
3705 case (3u | (2u << 4u)):
3707 for (
size_t n = 0; n < blocks16; ++n)
3709 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3711 uint8x16x2_t target_u_8x16x2;
3713 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3715 target_u_8x16x2.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)];
3718 vst2q_u8((uint8_t*)target, target_u_8x16x2);
3720 source += 16u * tSourceChannels;
3721 target += 16u * tTargetChannels;
3728 case (3u | (3u << 4u)):
3730 for (
size_t n = 0; n < blocks16; ++n)
3732 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3734 uint8x16x3_t target_u_8x16x3;
3736 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3738 target_u_8x16x3.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)];
3741 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3743 source += 16u * tSourceChannels;
3744 target += 16u * tTargetChannels;
3751 case (4u | (1u << 4u)):
3753 for (
size_t n = 0; n < blocks16; ++n)
3755 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3757 constexpr unsigned int sourceChannel = tShufflePattern & 0x00000003u;
3758 static_assert(sourceChannel <= 3u,
"Invalid shuffle pattern!");
3760 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3762 const uint8x16_t target_u_8x16 = source_u_8x16x4.val[sourceChannel];
3764 vst1q_u8((uint8_t*)target, target_u_8x16);
3766 source += 16u * tSourceChannels;
3767 target += 16u * tTargetChannels;
3774 case (4u | (2u << 4u)):
3776 for (
size_t n = 0; n < blocks16; ++n)
3778 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3780 uint8x16x2_t target_u_8x16x2;
3782 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3784 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3786 target_u_8x16x2.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u];
3789 vst2q_u8((uint8_t*)target, target_u_8x16x2);
3791 source += 16u * tSourceChannels;
3792 target += 16u * tTargetChannels;
3799 case (4u | (3u << 4u)):
3801 for (
size_t n = 0; n < blocks16; ++n)
3803 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3805 uint8x16x3_t target_u_8x16x3;
3807 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3809 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3811 target_u_8x16x3.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u];
3814 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3816 source += 16u * tSourceChannels;
3817 target += 16u * tTargetChannels;
3824 case (4u | (4u << 4u)):
3826 for (
size_t n = 0; n < blocks16; ++n)
3828 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3830 uint8x16x4_t target_u_8x16x4;
3832 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3834 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3836 target_u_8x16x4.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u];
3839 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3841 source += 16u * tSourceChannels;
3842 target += 16u * tTargetChannels;
3856 while (source != sourceEnd)
3858 ocean_assert(source < sourceEnd);
3860 for (
unsigned int n = 0u; n < tTargetChannels; ++n)
3862 target[n] = source[(tShufflePattern >> (n * 4u)) & 0x0000000Fu];
3865 source += tSourceChannels;
3866 target += tTargetChannels;
3870template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
3873 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u,
"Invalid channel number!");
3874 static_assert(tTargetChannels >= 2u && tTargetChannels <= 8u,
"Invalid channel number!");
3876 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels,
"Invalid shuffle pattern!");
3877 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels,
"Invalid shuffle pattern!");
3878 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels,
"Invalid shuffle pattern!");
3879 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels,
"Invalid shuffle pattern!");
3880 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels,
"Invalid shuffle pattern!");
3881 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels,
"Invalid shuffle pattern!");
3882 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels,
"Invalid shuffle pattern!");
3883 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels,
"Invalid shuffle pattern!");
3885 ocean_assert(source !=
nullptr && target !=
nullptr);
3886 ocean_assert(size != 0);
3888 ocean_assert(options !=
nullptr);
3890 const T lastChannelValue = *(
const T*)(options);
3892 const T*
const sourceEnd = source + size * tSourceChannels;
3894#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3898 const size_t blocks16 = size /
size_t(16);
3900 switch (tSourceChannels | ((tTargetChannels) << 4u))
3903 case (1u | (4u << 4u)):
3905 ocean_assert(tShufflePattern == 0u);
3907 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3909 uint8x16x4_t target_u_8x16x4;
3910 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3912 for (
size_t n = 0; n < blocks16; ++n)
3914 const uint8x16_t source_u_8x16 = vld1q_u8((
const uint8_t*)source);
3916 for (
unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3918 target_u_8x16x4.val[nT] = source_u_8x16;
3921 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3923 source += 16u * tSourceChannels;
3924 target += 16u * tTargetChannels;
3931 case (3u | (4u << 4u)):
3933 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3935 uint8x16x4_t target_u_8x16x4;
3936 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3938 for (
size_t n = 0; n < blocks16; ++n)
3940 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3942 for (
unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3944 target_u_8x16x4.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)];
3947 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3949 source += 16u * tSourceChannels;
3950 target += 16u * tTargetChannels;
3957 case (4u | (4u << 4u)):
3959 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3961 uint8x16x4_t target_u_8x16x4;
3962 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3964 for (
size_t n = 0; n < blocks16; ++n)
3966 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3968 for (
unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3970 target_u_8x16x4.val[nT] = source_u_8x16x4.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 3u)];
3973 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3975 source += 16u * tSourceChannels;
3976 target += 16u * tTargetChannels;
3990 while (source != sourceEnd)
3992 ocean_assert(source < sourceEnd);
3994 for (
unsigned int n = 0u; n < tTargetChannels - 1u; ++n)
3996 target[n] = source[(tShufflePattern >> (n * 4u)) & 0x0000000Fu];
3997 target[tTargetChannels - 1u] = lastChannelValue;
4000 source += tSourceChannels;
4001 target += tTargetChannels;
4005template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
4008 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u,
"Invalid channel number!");
4009 static_assert(tTargetChannels >= 1u && tTargetChannels <= 8u,
"Invalid channel number!");
4011 static_assert(tSourceChannels != 1u || tTargetChannels != 1u,
"Invalid channel number!");
4013 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels,
"Invalid shuffle pattern!");
4014 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels,
"Invalid shuffle pattern!");
4015 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels,
"Invalid shuffle pattern!");
4016 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels,
"Invalid shuffle pattern!");
4017 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels,
"Invalid shuffle pattern!");
4018 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels,
"Invalid shuffle pattern!");
4019 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels,
"Invalid shuffle pattern!");
4020 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels,
"Invalid shuffle pattern!");
4022 ocean_assert(source !=
nullptr && target !=
nullptr);
4023 ocean_assert(width >= 1u && height >= 1u);
4025 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
4026 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
4028 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
4030 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::shuffleRowChannels<T, tSourceChannels, tTargetChannels, tShufflePattern>, FrameChannels::reverseRowPixelOrderInPlace<T, tTargetChannels>, areContinuous,
nullptr, worker);
4033template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
4036 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u,
"Invalid channel number!");
4037 static_assert(tTargetChannels >= 2u && tTargetChannels <= 8u,
"Invalid channel number!");
4039 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels,
"Invalid shuffle pattern!");
4040 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels,
"Invalid shuffle pattern!");
4041 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels,
"Invalid shuffle pattern!");
4042 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels,
"Invalid shuffle pattern!");
4043 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels,
"Invalid shuffle pattern!");
4044 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels,
"Invalid shuffle pattern!");
4045 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels,
"Invalid shuffle pattern!");
4046 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels,
"Invalid shuffle pattern!");
4048 ocean_assert(source !=
nullptr && target !=
nullptr);
4049 ocean_assert(width >= 1u && height >= 1u);
4051 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
4052 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
4054 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
4056 const T options = newChannelValue;
4058 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::shuffleRowChannelsAndSetLastChannelValue<T, tSourceChannels, tTargetChannels, tShufflePattern>, FrameChannels::reverseRowPixelOrderInPlace<T, tTargetChannels>, areContinuous, &options, worker);
4061template <
unsigned int tChannels>
4064 static_assert(tChannels >= 1u,
"Invalid channel number!");
4066 ocean_assert(source !=
nullptr && target !=
nullptr);
4067 ocean_assert(width >= 1u && height >= 1u);
4069 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
4070 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
4072 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
4074 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::narrowRow16BitPerChannelTo8BitPerChannel<tChannels>, FrameChannels::reverseRowPixelOrderInPlace<uint8_t, tChannels>, areContinuous,
nullptr, worker);
4077template <
typename T,
unsigned int tChannels,
void (*tPixelFunction)(const T*, T*)>
4080 static_assert(tChannels > 0u,
"Invalid channel number!");
4082 ocean_assert(source && target);
4083 ocean_assert(width != 0u && height != 0u);
4087 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::applyPixelModifierSubset<T, tChannels, tPixelFunction>, source, target, width, height, conversionFlag, 0u, 0u), 0u, height);
4091 applyPixelModifierSubset<T, tChannels, tPixelFunction>(source, target, width, height, conversionFlag, 0u, height);
4095template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tPixelFunction)(const TSource*, TTarget*)>
4098 static_assert(tSourceChannels > 0u,
"Invalid source channel number!");
4099 static_assert(tTargetChannels > 0u,
"Invalid target channel number!");
4101 ocean_assert(source && target);
4102 ocean_assert(width != 0u && height != 0u);
4106 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::applyAdvancedPixelModifierSubset<TSource, TTarget, tSourceChannels, tTargetChannels, tPixelFunction>, source, target, width, height, sourcePaddingElements, targetPaddingElements, conversionFlag, 0u, 0u), 0u, height);
4110 applyAdvancedPixelModifierSubset<TSource, TTarget, tSourceChannels, tTargetChannels, tPixelFunction>(source, target, width, height, sourcePaddingElements, targetPaddingElements, conversionFlag, 0u, height);
4114template <
typename TSource0,
typename TSource1,
typename TTarget,
typename TIntermediate,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
4115void FrameChannels::applyBivariateOperator(
const TSource0* source0,
const TSource1* source1, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int source0PaddingElements,
const unsigned int source1PaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
Worker* worker)
4117 static_assert(tSourceChannels > 0u,
"Invalid source channel number!");
4118 static_assert(tTargetChannels > 0u,
"Invalid target channel number!");
4120 ocean_assert(source0 && source1 && target);
4121 ocean_assert(width != 0u && height != 0u);
4125 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::applyBivariateOperatorSubset<TSource0, TSource1, TTarget, TIntermediate, tSourceChannels, tTargetChannels, tOperator>, source0, source1, target, width, height, source0PaddingElements, source1PaddingElements, targetPaddingElements, conversionFlag, 0u, 0u), 0u, height);
4129 FrameChannels::applyBivariateOperatorSubset<TSource0, TSource1, TTarget, TIntermediate, tSourceChannels, tTargetChannels, tOperator>(source0, source1, target, width, height, source0PaddingElements, source1PaddingElements, targetPaddingElements, conversionFlag, 0u, height);
4133template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
4134void FrameChannels::applyRowOperator(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels>& rowOperatorFunction,
Worker* worker)
4136 static_assert(tSourceChannels > 0u,
"Invalid source channel number!");
4137 static_assert(tTargetChannels > 0u,
"Invalid target channel number!");
4139 ocean_assert(source !=
nullptr && target !=
nullptr);
4140 ocean_assert(width != 0u && height != 0u);
4142 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
4143 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
4147 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::applyRowOperatorSubset<TSource, TTarget, tSourceChannels, tTargetChannels>, source, target, width, height, sourceStrideElements, targetStrideElements, rowOperatorFunction, 0u, 0u), 0u, height);
4151 applyRowOperatorSubset<TSource, TTarget, tSourceChannels, tTargetChannels>(source, target, width, height, sourceStrideElements, targetStrideElements, rowOperatorFunction, 0u, height);
4155template <
typename T,
unsigned int tChannels>
4158 ocean_assert(source !=
nullptr && target !=
nullptr);
4159 ocean_assert(width >= 1u && height >= 1u);
4161 const unsigned int bytesPerRow = width *
sizeof(T) * tChannels;
4163 const unsigned int sourceStrideBytes = width *
sizeof(T) * tChannels +
sizeof(T) * sourcePaddingElements;
4164 const unsigned int targetStrideBytes = width *
sizeof(T) * tChannels +
sizeof(T) * targetPaddingElements;
4170 if (worker && height > 200u)
4172 worker->
executeFunction(
Worker::Function::createStatic(&
FrameChannels::transformGenericSubset, (
const uint8_t*)(source), (uint8_t*)(target), width, height, conversionFlag, rowReversePixelOrderFunction, bytesPerRow, sourceStrideBytes, targetStrideBytes, 0u, 0u), 0u, height, 9u, 10u, 20u);
4176 transformGenericSubset((
const uint8_t*)(source), (uint8_t*)(target), width, height, conversionFlag, rowReversePixelOrderFunction, bytesPerRow, sourceStrideBytes, targetStrideBytes, 0u, height);
4180template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
4183 static_assert(tChannels >= 2u,
"Invalid channel number!");
4184 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
4186 ocean_assert(frame !=
nullptr);
4187 ocean_assert(width >= 1u && height >= 1u);
4189 if (worker && height > 200u)
4191 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, frame, width, framePaddingElements, 0u, 0u), 0u, height, 3u, 4u, 20u);
4195 premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(frame, width, framePaddingElements, 0u, height);
4199template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
4202 static_assert(tChannels >= 2u,
"Invalid channel number!");
4203 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
4205 ocean_assert(source !=
nullptr && target !=
nullptr);
4206 ocean_assert(width >= 1u && height >= 1u);
4208 if (worker && height > 200u)
4210 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, source, target, width, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height, 5u, 6u, 20u);
4214 premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(source, target, width, sourcePaddingElements, targetPaddingElements, 0u, height);
4218template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
4221 static_assert(tChannels >= 2u,
"Invalid channel number!");
4222 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
4224 ocean_assert(frame !=
nullptr);
4225 ocean_assert(width >= 1u && height >= 1u);
4227 if (worker && height > 200u)
4229 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, frame, width, framePaddingElements, 0u, 0u), 0u, height, 3u, 4u, 20u);
4233 straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(frame, width, framePaddingElements, 0u, height);
4237template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
4240 static_assert(tChannels >= 2u,
"Invalid channel number!");
4241 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
4243 ocean_assert(source !=
nullptr && target !=
nullptr);
4244 ocean_assert(width >= 1u && height >= 1u);
4246 if (worker && height > 200u)
4248 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, source, target, width, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height, 5u, 6u, 20u);
4252 straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(source, target, width, sourcePaddingElements, targetPaddingElements, 0u, height);
4256template <
unsigned int tChannels>
4259 static_assert(tChannels >= 1u,
"Invalid channel number!");
4261 ocean_assert(source !=
nullptr && target !=
nullptr);
4262 ocean_assert(size > 0);
4264 const uint16_t*
const sourceEnd = source + size * tChannels;
4266#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4268 const size_t blocks8 = size /
size_t(8);
4274 for (
size_t n = 0; n < blocks8; ++n)
4276 const uint16x8_t sourceA_u_16x8 = vld1q_u16(source + 0);
4277 const uint16x8_t sourceB_u_16x8 = vld1q_u16(source + 8);
4278 const uint16x8_t sourceC_u_16x8 = vld1q_u16(source + 16);
4279 const uint16x8_t sourceD_u_16x8 = vld1q_u16(source + 24);
4281 const uint8x16_t targetAB_u_8x16 = vcombine_u8(vqrshrn_n_u16(sourceA_u_16x8, 8), vqrshrn_n_u16(sourceB_u_16x8, 8));
4282 const uint8x16_t targetCD_u_8x16 = vcombine_u8(vqrshrn_n_u16(sourceC_u_16x8, 8), vqrshrn_n_u16(sourceD_u_16x8, 8));
4284 vst1q_u8(target + 0, targetAB_u_8x16);
4285 vst1q_u8(target + 16, targetCD_u_8x16);
4287 source += 8u * tChannels;
4288 target += 8u * tChannels;
4300 while (source != sourceEnd)
4302 ocean_assert(source < sourceEnd);
4304 for (
unsigned int n = 0u; n < tChannels; ++n)
4306 ocean_assert((uint16_t)(source[n] >> 8u) <= 255u);
4307 target[n] = (uint8_t)(source[n] >> 8u);
4310 source += tChannels;
4311 target += tChannels;
4315template <
typename T,
unsigned int tSourceChannels,
bool tAddToFront>
4318 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
4319 static_assert(
sizeof(
size_t) ==
sizeof(
const T*),
"Invalid pointer size!");
4321 ocean_assert(sources !=
nullptr && targets !=
nullptr);
4322 ocean_assert(width != 0u && height != 0u);
4323 ocean_assert(multipleRowIndex < height);
4324 ocean_assert(options !=
nullptr);
4326 const T* source = (
const T*)(sources[0]);
4327 const T* sourceOneChannel = (
const T*)(sources[1]);
4328 ocean_assert(source !=
nullptr && sourceOneChannel !=
nullptr);
4330 T* target = (T*)(targets[0]);
4331 ocean_assert(target !=
nullptr);
4333 const unsigned int* uintOptions = (
const unsigned int*)options;
4334 ocean_assert(uintOptions !=
nullptr);
4336 const unsigned int sourcePaddingElements = uintOptions[0];
4337 const unsigned int sourceOneChannelPaddingElements = uintOptions[1];
4338 const unsigned int targetPaddingElements = uintOptions[2];
4340 const unsigned int targetChannels = tSourceChannels + 1u;
4342 const unsigned int sourceStrideElements = tSourceChannels * width + sourcePaddingElements;
4343 const unsigned int sourceOneChannelStrideElements = width + sourceOneChannelPaddingElements;
4344 const unsigned int targetStrideElements = targetChannels * width + targetPaddingElements;
4349 const T* sourceRow = source + sourceStrideElements * multipleRowIndex;
4350 const T* sourceOneChannelRow = sourceOneChannel + sourceOneChannelStrideElements * multipleRowIndex;
4351 T* targetRow = flipTarget ? target + targetStrideElements * (height - multipleRowIndex - 1u) : target + targetStrideElements * multipleRowIndex;
4353 if (mirrorTarget ==
false)
4355 for (
unsigned int n = 0u; n < width; ++n)
4357 if constexpr (tAddToFront)
4359 targetRow[0] = sourceOneChannelRow[0];
4361 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4363 targetRow[c + 1u] = sourceRow[c];
4368 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4370 targetRow[c] = sourceRow[c];
4373 targetRow[tSourceChannels] = sourceOneChannelRow[0];
4376 sourceRow += tSourceChannels;
4377 sourceOneChannelRow++;
4379 targetRow += targetChannels;
4384 targetRow += targetChannels * (width - 1u);
4386 for (
unsigned int n = 0u; n < width; ++n)
4388 if constexpr (tAddToFront)
4390 targetRow[0] = sourceOneChannelRow[0];
4392 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4394 targetRow[c + 1u] = sourceRow[c];
4399 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4401 targetRow[c] = sourceRow[c];
4404 targetRow[tSourceChannels] = sourceOneChannelRow[0];
4407 sourceRow += tSourceChannels;
4408 sourceOneChannelRow++;
4410 targetRow -= targetChannels;
4415template <
typename T,
unsigned int tSourceChannels,
bool tAddToFront>
4418 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
4420 ocean_assert(source !=
nullptr && target !=
nullptr);
4421 ocean_assert(size > 0);
4422 ocean_assert(channelValueParameter !=
nullptr);
4424 const T& channelValue = *((
const T*)channelValueParameter);
4426 const unsigned int targetChannels = tSourceChannels + 1u;
4428 for (
size_t n = 0; n < size; ++n)
4430 if constexpr (tAddToFront)
4432 target[0] = channelValue;
4434 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4436 target[c + 1u] = source[c];
4441 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4443 target[c] = source[c];
4446 target[tSourceChannels] = channelValue;
4449 source += tSourceChannels;
4450 target += targetChannels;
4454template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tSourceChannelIndex,
unsigned int tTargetChannelIndex>
4457 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
4458 static_assert(tTargetChannels != 0u,
"Invalid channel number!");
4460 static_assert(tSourceChannelIndex < tSourceChannels,
"Invalid channel number!");
4461 static_assert(tTargetChannelIndex < tTargetChannels,
"Invalid channel number!");
4463 ocean_assert(source !=
nullptr && target !=
nullptr);
4464 ocean_assert(size > 0);
4466 for (
size_t n = 0; n < size; ++n)
4468 target[tTargetChannelIndex] = source[tSourceChannelIndex];
4470 source += tSourceChannels;
4471 target += tTargetChannels;
4475template <
typename TSource,
typename TTarget>
4476void FrameChannels::separateTo1ChannelRuntime(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
4478 ocean_assert(sourceFrame !=
nullptr);
4479 ocean_assert(targetFrames !=
nullptr);
4481 ocean_assert(width != 0u && height != 0u);
4482 ocean_assert(channels != 0u);
4485 for (
unsigned int c = 0u; c < channels; ++c)
4487 ocean_assert(targetFrames[c] !=
nullptr);
4491 if (sourceFramePaddingElements == 0u && targetFramesPaddingElements ==
nullptr)
4493 for (
unsigned int n = 0u; n < width * height; ++n)
4495 for (
unsigned int c = 0u; c < channels; ++c)
4497 targetFrames[c][n] = TTarget(sourceFrame[n * channels + c]);
4501 else if (targetFramesPaddingElements ==
nullptr)
4503 ocean_assert(sourceFramePaddingElements != 0u);
4505 const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
4507 for (
unsigned int y = 0u; y < height; ++y)
4509 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
4511 const unsigned int targetRowOffset = y * width;
4513 for (
unsigned int x = 0u; x < width; ++x)
4515 for (
unsigned int c = 0u; c < channels; ++c)
4517 *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * channels + c));
4524 const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
4526 Indices32 targetFrameStrideElements(channels);
4528 for (
unsigned int c = 0u; c < channels; ++c)
4530 targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
4533 for (
unsigned int y = 0u; y < height; ++y)
4535 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
4537 for (
unsigned int x = 0u; x < width; ++x)
4539 for (
unsigned int c = 0u; c < channels; ++c)
4541 *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * channels + c));
4548template <
typename TSource,
typename TTarget>
4549void FrameChannels::zipChannelsRuntime(
const TSource*
const* sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
4551 ocean_assert(sourceFrames !=
nullptr);
4552 ocean_assert(targetFrame !=
nullptr);
4554 ocean_assert(width != 0u && height != 0u);
4555 ocean_assert(channels != 0u);
4557 bool allSourceFramesContinuous =
true;
4559 if (sourceFramesPaddingElements !=
nullptr)
4561 for (
unsigned int n = 0u; n < channels; ++n)
4563 if (sourceFramesPaddingElements[n] != 0u)
4565 allSourceFramesContinuous =
false;
4571 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
4573 for (
unsigned int n = 0u; n < width * height; ++n)
4575 for (
unsigned int c = 0u; c < channels; ++c)
4577 targetFrame[n * channels + c] = TTarget(sourceFrames[c][n]);
4583 const unsigned int targetFrameStrideElements = width * channels + targetFramePaddingElements;
4585 Indices32 sourceFrameStrideElements(channels);
4587 for (
unsigned int c = 0u; c < channels; ++c)
4589 if (sourceFramesPaddingElements ==
nullptr)
4591 sourceFrameStrideElements[c] = width;
4595 sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
4599 for (
unsigned int y = 0u; y < height; ++y)
4601 TTarget*
const targetRow = targetFrame + y * targetFrameStrideElements;
4603 for (
unsigned int x = 0u; x < width; ++x)
4605 for (
unsigned int c = 0u; c < channels; ++c)
4607 *(targetRow + x * channels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x));
4614template <
typename T,
unsigned int tChannel,
unsigned int tChannels>
4615void FrameChannels::setChannelSubset(T* frame,
const unsigned int width,
const T value,
const unsigned int framePaddingElements,
const unsigned int firstRow,
const unsigned int numberRows)
4617 static_assert(tChannels >= 1u,
"Invalid channel number!");
4618 static_assert(tChannel < tChannels,
"Invalid channel index!");
4620 ocean_assert(frame !=
nullptr);
4622 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
4624 frame += firstRow * frameStrideElements + tChannel;
4626 for (
unsigned int n = 0u; n < numberRows; ++n)
4628 for (
unsigned int x = 0u; x < width; ++x)
4630 frame[x * tChannels] = value;
4633 frame += frameStrideElements;
4637template <
typename T,
unsigned int tChannels,
void (*tPixelFunction)(const T*, T*)>
4640 static_assert(tChannels >= 1u,
"Invalid channel number");
4642 ocean_assert(source && target);
4643 ocean_assert(source != target);
4645 ocean_assert(numberRows > 0u);
4646 ocean_assert(firstRow + numberRows <= height);
4648 const unsigned int widthElements = width * tChannels;
4649 const unsigned int targetBlockSize = widthElements * numberRows;
4651 switch (conversionFlag)
4655 source += firstRow * widthElements;
4656 target += firstRow * widthElements;
4658 const T*
const targetEnd = target + targetBlockSize;
4660 while (target != targetEnd)
4662 tPixelFunction(source, target);
4664 source += tChannels;
4665 target += tChannels;
4673 source += firstRow * widthElements;
4674 target += width * height * tChannels - (firstRow + 1u) * widthElements;
4676 const T*
const targetEnd = target - targetBlockSize;
4678 while (target != targetEnd)
4680 const T*
const targetRowEnd = target + widthElements;
4682 while (target != targetRowEnd)
4684 tPixelFunction(source, target);
4686 source += tChannels;
4687 target += tChannels;
4690 target -= (widthElements << 1);
4698 source += firstRow * widthElements;
4699 target += (firstRow + 1u) * widthElements;
4701 const T*
const targetEnd = target + targetBlockSize;
4703 while (target != targetEnd)
4705 const T*
const targetRowEnd = target - widthElements;
4707 while (target != targetRowEnd)
4709 tPixelFunction(source, target -= tChannels);
4711 source += tChannels;
4714 target += widthElements << 1;
4722 source += firstRow * widthElements;
4723 target += width * height * tChannels - firstRow * widthElements;
4725 const T*
const targetEnd = target - targetBlockSize;
4727 while (target != targetEnd)
4729 tPixelFunction(source, target -= tChannels);
4731 source += tChannels;
4742template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tPixelFunction)(const TSource*, TTarget*)>
4743void FrameChannels::applyAdvancedPixelModifierSubset(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows)
4745 static_assert(tSourceChannels >= 1u,
"Invalid source channel number");
4746 static_assert(tTargetChannels >= 1u,
"Invalid target channel number");
4748 ocean_assert(source && target);
4749 ocean_assert((
void*)source != (
void*)target);
4751 ocean_assert(numberRows != 0u);
4752 ocean_assert(firstRow + numberRows <= height);
4754 const unsigned int sourceWidthElements = width * tSourceChannels;
4755 const unsigned int targetWidthElements = width * tTargetChannels;
4757 const unsigned int sourceStrideElements = sourceWidthElements + sourcePaddingElements;
4758 const unsigned int targetStrideElements = targetWidthElements + targetPaddingElements;
4760 switch (conversionFlag)
4764 for (
unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4766 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4767 TTarget* targetPixel = target + rowIndex * targetStrideElements;
4769 for (
unsigned int x = 0u; x < width; ++x)
4771 tPixelFunction(sourcePixel, targetPixel);
4773 sourcePixel += tSourceChannels;
4774 targetPixel += tTargetChannels;
4783 for (
unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4785 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4786 TTarget* targetPixel = target + (height - rowIndex - 1u) * targetStrideElements;
4788 for (
unsigned int x = 0u; x < width; ++x)
4790 tPixelFunction(sourcePixel, targetPixel);
4792 sourcePixel += tSourceChannels;
4793 targetPixel += tTargetChannels;
4802 for (
unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4804 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4806 TTarget*
const targetRowBegin = target + rowIndex * targetStrideElements;
4807 TTarget* targetPixel = targetRowBegin + targetWidthElements - tTargetChannels;
4809 for (
unsigned int x = 0u; x < width; ++x)
4811 ocean_assert(targetPixel >= targetRowBegin);
4812 tPixelFunction(sourcePixel, targetPixel);
4814 sourcePixel += tSourceChannels;
4815 targetPixel -= tTargetChannels;
4824 for (
unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4826 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4828 TTarget*
const targetRowBegin = target + (height - rowIndex - 1u) * targetStrideElements;
4829 TTarget* targetPixel = targetRowBegin + targetWidthElements - tTargetChannels;
4831 for (
unsigned int x = 0u; x < width; ++x)
4833 ocean_assert(targetPixel >= targetRowBegin);
4834 tPixelFunction(sourcePixel, targetPixel);
4836 sourcePixel += tSourceChannels;
4837 targetPixel -= tTargetChannels;
4849template <
typename TSource0,
typename TSource1,
typename TTarget,
typename TIntermediate,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
4850void FrameChannels::applyBivariateOperatorSubset(
const TSource0* source0,
const TSource1* source1, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int source0PaddingElements,
const unsigned int source1PaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows)
4852 static_assert(tSourceChannels >= 1u,
"Invalid source channel number");
4853 static_assert(tTargetChannels >= 1u,
"Invalid target channel number");
4854 static_assert(tOperator,
"Invalid operator function");
4856 ocean_assert(source0 !=
nullptr && source1 !=
nullptr && target !=
nullptr);
4857 ocean_assert((
const void*)(source0) != (
const void*)(target));
4858 ocean_assert((
const void*)(source1) != (
const void*)(target));
4860 ocean_assert(numberRows != 0u);
4861 ocean_assert(firstRow + numberRows <= height);
4863 const unsigned int source0StrideElements = width * tSourceChannels + source0PaddingElements;
4864 const unsigned int source1StrideElements = width * tSourceChannels + source1PaddingElements;
4866 const unsigned int targetWidthElements = width * tTargetChannels;
4868 const unsigned int targetStrideElements = targetWidthElements + targetPaddingElements;
4870 switch (conversionFlag)
4874 for (
unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4876 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4877 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4879 TTarget* rowTarget = target + rowIndex * targetStrideElements;
4880 const TTarget*
const rowTargetEnd = rowTarget + targetWidthElements;
4882 while (rowTarget != rowTargetEnd)
4884 ocean_assert(rowTarget < rowTargetEnd);
4886 tOperator(rowSource0, rowSource1, rowTarget);
4888 rowSource0 += tSourceChannels;
4889 rowSource1 += tSourceChannels;
4891 rowTarget += tTargetChannels;
4900 for (
unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4902 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4903 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4905 TTarget* rowTarget = target + (height - rowIndex - 1u) * targetStrideElements;
4906 const TTarget*
const rowTargetEnd = rowTarget + targetWidthElements;
4908 while (rowTarget != rowTargetEnd)
4910 ocean_assert(rowTarget < rowTargetEnd);
4912 tOperator(rowSource0, rowSource1, rowTarget);
4914 rowSource0 += tSourceChannels;
4915 rowSource1 += tSourceChannels;
4917 rowTarget += tTargetChannels;
4926 for (
unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4928 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4929 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4931 TTarget* rowTarget = target + rowIndex * targetStrideElements + targetWidthElements - tTargetChannels;
4932 const TTarget*
const rowTargetEnd = rowTarget - targetWidthElements;
4934 while (rowTarget != rowTargetEnd)
4936 ocean_assert(rowTarget > rowTargetEnd);
4938 tOperator(rowSource0, rowSource1, rowTarget);
4940 rowSource0 += tSourceChannels;
4941 rowSource1 += tSourceChannels;
4943 rowTarget -= tTargetChannels;
4952 for (
unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4954 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4955 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4957 TTarget* rowTarget = target + (height - rowIndex - 1u) * targetStrideElements + targetWidthElements - tTargetChannels;
4958 const TTarget*
const rowTargetEnd = rowTarget - targetWidthElements;
4960 while (rowTarget != rowTargetEnd)
4962 ocean_assert(rowTarget > rowTargetEnd);
4964 tOperator(rowSource0, rowSource1, rowTarget);
4966 rowSource0 += tSourceChannels;
4967 rowSource1 += tSourceChannels;
4969 rowTarget -= tTargetChannels;
4977 ocean_assert(
false &&
"This should never happen!");
4982template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
4983void FrameChannels::applyRowOperatorSubset(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourceStrideElements,
const unsigned int targetStrideElements,
const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels> rowOperatorFunction,
const unsigned int firstRow,
const unsigned int numberRows)
4985 static_assert(tSourceChannels >= 1u,
"Invalid source channel number");
4986 static_assert(tTargetChannels >= 1u,
"Invalid target channel number");
4988 ocean_assert(source !=
nullptr && target !=
nullptr);
4989 ocean_assert((
const void*)source != (
const void*)target);
4991 ocean_assert(width * tSourceChannels <= sourceStrideElements);
4992 ocean_assert(width * tTargetChannels <= targetStrideElements);
4994 ocean_assert(rowOperatorFunction !=
nullptr);
4996 ocean_assert(numberRows != 0u);
4997 ocean_assert(firstRow + numberRows <= height);
4999 for (
unsigned int y = firstRow; y < firstRow + numberRows; ++y)
5001 rowOperatorFunction(source + y * sourceStrideElements, target + y * targetStrideElements, width, height, y, sourceStrideElements, targetStrideElements);
5005template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2>
5008 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2,
"Invalid channel factors!");
5010 ocean_assert(channelMultiplicationFactors_128 !=
nullptr);
5011 const unsigned int* channelFactors_128 =
reinterpret_cast<const unsigned int*
>(channelMultiplicationFactors_128);
5012 ocean_assert(channelFactors_128 !=
nullptr);
5014 const unsigned int factorChannel0_128 = channelFactors_128[0];
5015 const unsigned int factorChannel1_128 = channelFactors_128[1];
5016 const unsigned int factorChannel2_128 = channelFactors_128[2];
5018 ocean_assert(factorChannel0_128 <= 128u && factorChannel1_128 <= 128u && factorChannel2_128 <= 128u);
5019 ocean_assert(factorChannel0_128 + factorChannel1_128 + factorChannel2_128 == 128u);
5021 ocean_assert(tUseFactorChannel0 == (factorChannel0_128 != 0u));
5022 ocean_assert(tUseFactorChannel1 == (factorChannel1_128 != 0u));
5023 ocean_assert(tUseFactorChannel2 == (factorChannel2_128 != 0u));
5025 ocean_assert(source !=
nullptr && target !=
nullptr && size >= 1);
5027 const uint8_t*
const targetEnd = target + size;
5029#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
5031 constexpr size_t blockSize = 16;
5032 const size_t blocks = size / blockSize;
5034 const __m128i multiplicationFactors0_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel0_128));
5035 const __m128i multiplicationFactors1_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel1_128));
5036 const __m128i multiplicationFactors2_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel2_128));
5038 for (
size_t n = 0; n < blocks; ++n)
5042 source += blockSize *
size_t(3);
5043 target += blockSize;
5046#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5048 constexpr size_t blockSize = 8;
5049 const size_t blocks = size / blockSize;
5051 const uint8x8_t factorChannel0_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel0_128);
5052 const uint8x8_t factorChannel1_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel1_128);
5053 const uint8x8_t factorChannel2_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel2_128);
5055 for (
size_t n = 0; n < blocks; ++n)
5057 convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON<tUseFactorChannel0, tUseFactorChannel1, tUseFactorChannel2>(source, target, factorChannel0_128_u_8x8, factorChannel1_128_u_8x8, factorChannel2_128_u_8x8);
5059 source += blockSize *
size_t(3);
5060 target += blockSize;
5065 while (target != targetEnd)
5067 ocean_assert(target < targetEnd);
5069 const unsigned int channel0 = tUseFactorChannel0 ? (source[0] * factorChannel0_128) : 0u;
5070 const unsigned int channel1 = tUseFactorChannel1 ? (source[1] * factorChannel1_128) : 0u;
5071 const unsigned int channel2 = tUseFactorChannel2 ? (source[2] * factorChannel2_128) : 0u;
5073 *target++ = (uint8_t)((channel0 + channel1 + channel2 + 64u) >> 7u);
5078template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2,
bool tUseFactorChannel3>
5081 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2 || tUseFactorChannel3,
"Invalid channel factors!");
5083 ocean_assert(channelMultiplicationFactors_128 !=
nullptr);
5084 const unsigned int* channelFactors_128 =
reinterpret_cast<const unsigned int*
>(channelMultiplicationFactors_128);
5085 ocean_assert(channelFactors_128 !=
nullptr);
5087 const unsigned int factorChannel0_128 = channelFactors_128[0];
5088 const unsigned int factorChannel1_128 = channelFactors_128[1];
5089 const unsigned int factorChannel2_128 = channelFactors_128[2];
5090 const unsigned int factorChannel3_128 = channelFactors_128[3];
5092 ocean_assert(factorChannel0_128 <= 127u && factorChannel1_128 <= 127u && factorChannel2_128 <= 127u && factorChannel3_128 <= 127u);
5093 ocean_assert(factorChannel0_128 + factorChannel1_128 + factorChannel2_128 + factorChannel3_128 == 128u);
5095 ocean_assert(tUseFactorChannel0 == (factorChannel0_128 != 0u));
5096 ocean_assert(tUseFactorChannel1 == (factorChannel1_128 != 0u));
5097 ocean_assert(tUseFactorChannel2 == (factorChannel2_128 != 0u));
5098 ocean_assert(tUseFactorChannel3 == (factorChannel3_128 != 0u));
5100 ocean_assert(source !=
nullptr && target !=
nullptr && size >= 1);
5102 const uint8_t*
const targetEnd = target + size;
5104#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
5106 constexpr size_t blockSize = 16;
5107 const size_t blocks = size / blockSize;
5109 const __m128i m128_multiplicationFactors = _mm_set1_epi32(
int(factorChannel0_128 | (factorChannel1_128 << 8u) | (factorChannel2_128 << 16u) | (factorChannel3_128 << 24u)));
5111 for (
size_t n = 0; n < blocks; ++n)
5115 source += blockSize *
size_t(4);
5116 target += blockSize;
5119#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5121 constexpr size_t blockSize = 8;
5122 const size_t blocks = size / blockSize;
5124 const uint8x8_t factorChannel0_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel0_128);
5125 const uint8x8_t factorChannel1_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel1_128);
5126 const uint8x8_t factorChannel2_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel2_128);
5127 const uint8x8_t factorChannel3_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel3_128);
5129 for (
size_t n = 0; n < blocks; ++n)
5131 convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON<tUseFactorChannel0, tUseFactorChannel1, tUseFactorChannel2, tUseFactorChannel3>(source, target, factorChannel0_128_u_8x8, factorChannel1_128_u_8x8, factorChannel2_128_u_8x8, factorChannel3_128_u_8x8);
5133 source += blockSize *
size_t(4);
5134 target += blockSize;
5139 while (target != targetEnd)
5141 ocean_assert(target < targetEnd);
5143 const unsigned int channel0 = tUseFactorChannel0 ? (source[0] * factorChannel0_128) : 0u;
5144 const unsigned int channel1 = tUseFactorChannel1 ? (source[1] * factorChannel1_128) : 0u;
5145 const unsigned int channel2 = tUseFactorChannel2 ? (source[2] * factorChannel2_128) : 0u;
5146 const unsigned int channel3 = tUseFactorChannel3 ? (source[3] * factorChannel3_128) : 0u;
5148 *target++ = (uint8_t)((channel0 + channel1 + channel2 + channel3 + 64u) >> 7u);
5153template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
5156 static_assert(tChannels >= 2u,
"Invalid channel number!");
5157 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
5159 ocean_assert(frame !=
nullptr);
5160 ocean_assert(width >= 1u);
5162 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
5164 uint8_t* frameRow = frame + frameStrideElements * firstRow;
5166 for (
unsigned int y = 0u; y < numberRows; ++y)
5168 for (
unsigned int x = 0u; x < width; ++x)
5170 if (frameRow[tAlphaChannelIndex])
5172 const uint8_t alpha_2 = frameRow[tAlphaChannelIndex] / 2u;
5174 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5176 if (channelIndex != tAlphaChannelIndex)
5178 frameRow[channelIndex] = uint8_t(std::min((frameRow[channelIndex] * 255u + alpha_2) / frameRow[tAlphaChannelIndex], 255u));
5183 frameRow += tChannels;
5186 frameRow += framePaddingElements;
5190template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
5193 static_assert(tChannels >= 2u,
"Invalid channel number!");
5194 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
5196 ocean_assert(source !=
nullptr && target !=
nullptr);
5197 ocean_assert(width >= 1u);
5199 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
5200 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
5202 const uint8_t* sourceRow = source + sourceStrideElements * firstRow;
5203 uint8_t* targetRow = target + targetStrideElements * firstRow;
5205 for (
unsigned int y = 0u; y < numberRows; ++y)
5207 for (
unsigned int x = 0u; x < width; ++x)
5209 if (sourceRow[tAlphaChannelIndex])
5211 const uint8_t alpha_2 = sourceRow[tAlphaChannelIndex] / 2u;
5213 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5215 if (channelIndex != tAlphaChannelIndex)
5217 targetRow[channelIndex] = uint8_t(std::max((sourceRow[channelIndex] * 255u + alpha_2) / sourceRow[tAlphaChannelIndex], 255u));
5221 targetRow[channelIndex] = sourceRow[channelIndex];
5227 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5229 targetRow[channelIndex] = sourceRow[channelIndex];
5233 sourceRow += tChannels;
5234 targetRow += tChannels;
5237 sourceRow += sourcePaddingElements;
5238 targetRow += targetPaddingElements;
5242template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
5245 static_assert(tChannels >= 2u,
"Invalid channel number!");
5246 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
5248 ocean_assert(frame !=
nullptr);
5249 ocean_assert(width >= 1u);
5251 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
5253 uint8_t* frameRow = frame + frameStrideElements * firstRow;
5255 for (
unsigned int y = 0u; y < numberRows; ++y)
5257 for (
unsigned int x = 0u; x < width; ++x)
5259 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5261 if (channelIndex != tAlphaChannelIndex)
5263 frameRow[channelIndex] = (frameRow[channelIndex] * frameRow[tAlphaChannelIndex] + 127u) / 255u;
5267 frameRow += tChannels;
5270 frameRow += framePaddingElements;
5274template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
5277 static_assert(tChannels >= 2u,
"Invalid channel number!");
5278 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
5280 ocean_assert(source !=
nullptr && target !=
nullptr);
5281 ocean_assert(width >= 1u);
5283 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
5284 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
5286 const uint8_t* sourceRow = source + sourceStrideElements * firstRow;
5287 uint8_t* targetRow = target + targetStrideElements * firstRow;
5289 for (
unsigned int y = 0u; y < numberRows; ++y)
5291 for (
unsigned int x = 0u; x < width; ++x)
5293 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5295 if (channelIndex != tAlphaChannelIndex)
5297 targetRow[channelIndex] = (sourceRow[channelIndex] * sourceRow[tAlphaChannelIndex] + 127u) / 255u;
5301 targetRow[channelIndex] = sourceRow[channelIndex];
5305 sourceRow += tChannels;
5306 targetRow += tChannels;
5309 sourceRow += sourcePaddingElements;
5310 targetRow += targetPaddingElements;
5314#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
5318 ocean_assert(source !=
nullptr && target !=
nullptr);
5335 const __m128i constant64_u_16x8 = _mm_set1_epi32(0x00400040);
5337 const __m128i sourceA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5338 const __m128i sourceB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5339 const __m128i sourceC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5341 __m128i channel0_u_8x16;
5342 __m128i channel1_u_8x16;
5343 __m128i channel2_u_8x16;
5352 const __m128i channel0_high_u_8x16 = _mm_srli_epi16(channel0_u_8x16, 8);
5353 const __m128i channel1_high_u_8x16 = _mm_srli_epi16(channel1_u_8x16, 8);
5354 const __m128i channel2_high_u_8x16 = _mm_srli_epi16(channel2_u_8x16, 8);
5358 const __m128i result0_low_u_8x16 = _mm_mullo_epi16(channel0_low_u_8x16, multiplicationFactors0_128_u_16x8);
5359 const __m128i result0_high_u_8x16 = _mm_mullo_epi16(channel0_high_u_8x16, multiplicationFactors0_128_u_16x8);
5361 const __m128i result1_low_u_8x16 = _mm_mullo_epi16(channel1_low_u_8x16, multiplicationFactors1_128_u_16x8);
5362 const __m128i result1_high_u_8x16 = _mm_mullo_epi16(channel1_high_u_8x16, multiplicationFactors1_128_u_16x8);
5364 const __m128i result2_low_u_8x16 = _mm_mullo_epi16(channel2_low_u_8x16, multiplicationFactors2_128_u_16x8);
5365 const __m128i result2_high_u_8x16 = _mm_mullo_epi16(channel2_high_u_8x16, multiplicationFactors2_128_u_16x8);
5368 const __m128i result128_low_u_8x16 = _mm_adds_epu16(_mm_adds_epu16(result0_low_u_8x16, result1_low_u_8x16), _mm_adds_epu16(result2_low_u_8x16, constant64_u_16x8));
5369 const __m128i result128_high_u_8x16 = _mm_adds_epu16(_mm_adds_epu16(result0_high_u_8x16, result1_high_u_8x16), _mm_adds_epu16(result2_high_u_8x16, constant64_u_16x8));
5372 const __m128i result_low_u_8x16 = _mm_srli_epi16(result128_low_u_8x16, 7);
5373 const __m128i result_high_u_8x16 = _mm_srli_epi16(result128_high_u_8x16, 7);
5376 const __m128i result_u_8x16 = _mm_or_si128(result_low_u_8x16, _mm_slli_epi16(result_high_u_8x16, 8));
5379 _mm_storeu_si128((__m128i*)target, result_u_8x16);
5382OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_128_s_16x8,
const __m128i& factorChannel10_128_s_16x8,
const __m128i& factorChannel20_128_s_16x8,
const __m128i& factorChannel01_128_s_16x8,
const __m128i& factorChannel11_128_s_16x8,
const __m128i& factorChannel21_128_s_16x8,
const __m128i& factorChannel02_128_s_16x8,
const __m128i& factorChannel12_128_s_16x8,
const __m128i& factorChannel22_128_s_16x8,
const __m128i& biasChannel0_s_16x8,
const __m128i& biasChannel1_s_16x8,
const __m128i& biasChannel2_s_16x8)
5384 ocean_assert(source !=
nullptr && target !=
nullptr);
5403 const __m128i sourceA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5404 const __m128i sourceB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5405 const __m128i sourceC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5407 __m128i channel0_u_8x16;
5408 __m128i channel1_u_8x16;
5409 __m128i channel2_u_8x16;
5418 const __m128i channel0_high_u_8x16 = _mm_srli_epi16(channel0_u_8x16, 8);
5419 const __m128i channel1_high_u_8x16 = _mm_srli_epi16(channel1_u_8x16, 8);
5420 const __m128i channel2_high_u_8x16 = _mm_srli_epi16(channel2_u_8x16, 8);
5424 __m128i result0_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel00_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel01_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel02_128_s_16x8));
5425 __m128i result1_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel10_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel11_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel12_128_s_16x8));
5426 __m128i result2_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel20_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel21_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel22_128_s_16x8));
5428 __m128i result0_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel00_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel01_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel02_128_s_16x8));
5429 __m128i result1_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel10_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel11_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel12_128_s_16x8));
5430 __m128i result2_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel20_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel21_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel22_128_s_16x8));
5444 const __m128i constant255_s_16x8 = _mm_set1_epi16(255);
5446 result0_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result0_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5447 result1_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result1_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5448 result2_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result2_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5450 result0_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result0_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5451 result1_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result1_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5452 result2_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result2_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5455 const __m128i result0_u_8x16 = _mm_or_si128(result0_low_u_8x16, _mm_slli_epi16(result0_high_u_8x16, 8));
5456 const __m128i result1_u_8x16 = _mm_or_si128(result1_low_u_8x16, _mm_slli_epi16(result1_high_u_8x16, 8));
5457 const __m128i result2_u_8x16 = _mm_or_si128(result2_low_u_8x16, _mm_slli_epi16(result2_high_u_8x16, 8));
5459 __m128i resultA_u_8x16;
5460 __m128i resultB_u_8x16;
5461 __m128i resultC_u_8x16;
5465 _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5466 _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5467 _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5470OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_1024_s_16x8,
const __m128i& factorChannel10_1024_s_16x8,
const __m128i& factorChannel20_1024_s_16x8,
const __m128i& factorChannel01_1024_s_16x8,
const __m128i& factorChannel11_1024_s_16x8,
const __m128i& factorChannel21_1024_s_16x8,
const __m128i& factorChannel02_1024_s_16x8,
const __m128i& factorChannel12_1024_s_16x8,
const __m128i& factorChannel22_1024_s_16x8,
const __m128i& biasChannel0_1024_s_32x4,
const __m128i& biasChannel1_1024_s_32x4,
const __m128i& biasChannel2_1024_s_32x4)
5472 ocean_assert(source !=
nullptr && target !=
nullptr);
5492 const __m128i sourceA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5493 const __m128i sourceB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5494 const __m128i sourceC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5496 __m128i channel0_u_8x16;
5497 __m128i channel1_u_8x16;
5498 __m128i channel2_u_8x16;
5508 const __m128i channel0_high_u_16x8 = _mm_srli_epi16(channel0_u_8x16, 8);
5509 const __m128i channel1_high_u_16x8 = _mm_srli_epi16(channel1_u_8x16, 8);
5510 const __m128i channel2_high_u_16x8 = _mm_srli_epi16(channel2_u_8x16, 8);
5515 __m128i result0_low_A_s_32x4;
5516 __m128i result0_low_B_s_32x4;
5517 __m128i result0_high_A_s_32x4;
5518 __m128i result0_high_B_s_32x4;
5535 __m128i result1_low_A_s_32x4;
5536 __m128i result1_low_B_s_32x4;
5537 __m128i result1_high_A_s_32x4;
5538 __m128i result1_high_B_s_32x4;
5555 __m128i result2_low_A_s_32x4;
5556 __m128i result2_low_B_s_32x4;
5557 __m128i result2_high_A_s_32x4;
5558 __m128i result2_high_B_s_32x4;
5578 const __m128i mask_0000FFFF_32x4 = _mm_set1_epi32(0x0000FFFF);
5580 __m128i result0_A_s_16x8 = _mm_or_si128(_mm_and_si128(result0_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result0_high_A_s_32x4, 16));
5581 __m128i result0_B_s_16x8 = _mm_or_si128(_mm_and_si128(result0_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result0_high_B_s_32x4, 16));
5583 __m128i result1_A_s_16x8 = _mm_or_si128(_mm_and_si128(result1_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result1_high_A_s_32x4, 16));
5584 __m128i result1_B_s_16x8 = _mm_or_si128(_mm_and_si128(result1_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result1_high_B_s_32x4, 16));
5586 __m128i result2_A_s_16x8 = _mm_or_si128(_mm_and_si128(result2_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result2_high_A_s_32x4, 16));
5587 __m128i result2_B_s_16x8 = _mm_or_si128(_mm_and_si128(result2_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result2_high_B_s_32x4, 16));
5592 const __m128i result0_u_8x16 = _mm_packus_epi16(result0_A_s_16x8, result0_B_s_16x8);
5593 const __m128i result1_u_8x16 = _mm_packus_epi16(result1_A_s_16x8, result1_B_s_16x8);
5594 const __m128i result2_u_8x16 = _mm_packus_epi16(result2_A_s_16x8, result2_B_s_16x8);
5596 __m128i resultA_u_8x16;
5597 __m128i resultB_u_8x16;
5598 __m128i resultC_u_8x16;
5602 _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5603 _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5604 _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5607OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_64_s_16x8,
const __m128i& factorChannel10_64_s_16x8,
const __m128i& factorChannel20_64_s_16x8,
const __m128i& factorChannel01_64_s_16x8,
const __m128i& factorChannel11_64_s_16x8,
const __m128i& factorChannel21_64_s_16x8,
const __m128i& factorChannel02_64_s_16x8,
const __m128i& factorChannel12_64_s_16x8,
const __m128i& factorChannel22_64_s_16x8,
const __m128i& biasChannel0_s_16x8,
const __m128i& biasChannel1_s_16x8,
const __m128i& biasChannel2_s_16x8)
5609 ocean_assert(source !=
nullptr && target !=
nullptr);
5628 const __m128i sourceA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5629 const __m128i sourceB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5630 const __m128i sourceC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5632 __m128i channel0_u_8x16;
5633 __m128i channel1_u_8x16;
5634 __m128i channel2_u_8x16;
5639 const __m128i channel0_low_s_16x8 = _mm_sub_epi16(_mm_unpacklo_epi8(channel0_u_8x16, _mm_setzero_si128()), biasChannel0_s_16x8);
5640 const __m128i channel1_low_s_16x8 = _mm_sub_epi16(_mm_unpacklo_epi8(channel1_u_8x16, _mm_setzero_si128()), biasChannel1_s_16x8);
5641 const __m128i channel2_low_s_16x8 = _mm_sub_epi16(_mm_unpacklo_epi8(channel2_u_8x16, _mm_setzero_si128()), biasChannel2_s_16x8);
5643 const __m128i channel0_high_s_16x8 = _mm_sub_epi16(_mm_unpackhi_epi8(channel0_u_8x16, _mm_setzero_si128()), biasChannel0_s_16x8);
5644 const __m128i channel1_high_s_16x8 = _mm_sub_epi16(_mm_unpackhi_epi8(channel1_u_8x16, _mm_setzero_si128()), biasChannel1_s_16x8);
5645 const __m128i channel2_high_s_16x8 = _mm_sub_epi16(_mm_unpackhi_epi8(channel2_u_8x16, _mm_setzero_si128()), biasChannel2_s_16x8);
5649 __m128i result0_low_s_16x8 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_s_16x8, factorChannel00_64_s_16x8), _mm_mullo_epi16(channel1_low_s_16x8, factorChannel01_64_s_16x8)), _mm_mullo_epi16(channel2_low_s_16x8, factorChannel02_64_s_16x8));
5650 __m128i result1_low_s_16x8 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_s_16x8, factorChannel10_64_s_16x8), _mm_mullo_epi16(channel1_low_s_16x8, factorChannel11_64_s_16x8)), _mm_mullo_epi16(channel2_low_s_16x8, factorChannel12_64_s_16x8));
5651 __m128i result2_low_s_16x8 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_s_16x8, factorChannel20_64_s_16x8), _mm_mullo_epi16(channel1_low_s_16x8, factorChannel21_64_s_16x8)), _mm_mullo_epi16(channel2_low_s_16x8, factorChannel22_64_s_16x8));
5653 __m128i result0_high_s_16x8 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_s_16x8, factorChannel00_64_s_16x8), _mm_mullo_epi16(channel1_high_s_16x8, factorChannel01_64_s_16x8)), _mm_mullo_epi16(channel2_high_s_16x8, factorChannel02_64_s_16x8));
5654 __m128i result1_high_s_16x8 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_s_16x8, factorChannel10_64_s_16x8), _mm_mullo_epi16(channel1_high_s_16x8, factorChannel11_64_s_16x8)), _mm_mullo_epi16(channel2_high_s_16x8, factorChannel12_64_s_16x8));
5655 __m128i result2_high_s_16x8 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_s_16x8, factorChannel20_64_s_16x8), _mm_mullo_epi16(channel1_high_s_16x8, factorChannel21_64_s_16x8)), _mm_mullo_epi16(channel2_high_s_16x8, factorChannel22_64_s_16x8));
5668 const __m128i result0_u_8x16 = _mm_packus_epi16(result0_low_s_16x8, result0_high_s_16x8);
5669 const __m128i result1_u_8x16 = _mm_packus_epi16(result1_low_s_16x8, result1_high_s_16x8);
5670 const __m128i result2_u_8x16 = _mm_packus_epi16(result2_low_s_16x8, result2_high_s_16x8);
5672 __m128i resultA_u_8x16;
5673 __m128i resultB_u_8x16;
5674 __m128i resultC_u_8x16;
5678 _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5679 _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5680 _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5683OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_64_s_16x8,
const __m128i& factorChannel10_64_s_16x8,
const __m128i& factorChannel20_64_s_16x8,
const __m128i& factorChannel01_64_s_16x8,
const __m128i& factorChannel11_64_s_16x8,
const __m128i& factorChannel21_64_s_16x8,
const __m128i& factorChannel02_64_s_16x8,
const __m128i& factorChannel12_64_s_16x8,
const __m128i& factorChannel22_64_s_16x8,
const __m128i& biasChannel0_s_16x8,
const __m128i& biasChannel1_s_16x8,
const __m128i& biasChannel2_s_16x8,
const __m128i& channelValue3_u_8x16)
5685 ocean_assert(source !=
nullptr && target !=
nullptr);
5700 const __m128i sourceA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5701 const __m128i sourceB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5702 const __m128i sourceC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5704 __m128i channel0_u_8x16;
5705 __m128i channel1_u_8x16;
5706 __m128i channel2_u_8x16;
5711 const __m128i channel0_low_s_16x8 = _mm_sub_epi16(_mm_unpacklo_epi8(channel0_u_8x16, _mm_setzero_si128()), biasChannel0_s_16x8);
5712 const __m128i channel1_low_s_16x8 = _mm_sub_epi16(_mm_unpacklo_epi8(channel1_u_8x16, _mm_setzero_si128()), biasChannel1_s_16x8);
5713 const __m128i channel2_low_s_16x8 = _mm_sub_epi16(_mm_unpacklo_epi8(channel2_u_8x16, _mm_setzero_si128()), biasChannel2_s_16x8);
5715 const __m128i channel0_high_s_16x8 = _mm_sub_epi16(_mm_unpackhi_epi8(channel0_u_8x16, _mm_setzero_si128()), biasChannel0_s_16x8);
5716 const __m128i channel1_high_s_16x8 = _mm_sub_epi16(_mm_unpackhi_epi8(channel1_u_8x16, _mm_setzero_si128()), biasChannel1_s_16x8);
5717 const __m128i channel2_high_s_16x8 = _mm_sub_epi16(_mm_unpackhi_epi8(channel2_u_8x16, _mm_setzero_si128()), biasChannel2_s_16x8);
5721 __m128i result0_low_s_16x8 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_s_16x8, factorChannel00_64_s_16x8), _mm_mullo_epi16(channel1_low_s_16x8, factorChannel01_64_s_16x8)), _mm_mullo_epi16(channel2_low_s_16x8, factorChannel02_64_s_16x8));
5722 __m128i result1_low_s_16x8 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_s_16x8, factorChannel10_64_s_16x8), _mm_mullo_epi16(channel1_low_s_16x8, factorChannel11_64_s_16x8)), _mm_mullo_epi16(channel2_low_s_16x8, factorChannel12_64_s_16x8));
5723 __m128i result2_low_s_16x8 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_s_16x8, factorChannel20_64_s_16x8), _mm_mullo_epi16(channel1_low_s_16x8, factorChannel21_64_s_16x8)), _mm_mullo_epi16(channel2_low_s_16x8, factorChannel22_64_s_16x8));
5725 __m128i result0_high_s_16x8 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_s_16x8, factorChannel00_64_s_16x8), _mm_mullo_epi16(channel1_high_s_16x8, factorChannel01_64_s_16x8)), _mm_mullo_epi16(channel2_high_s_16x8, factorChannel02_64_s_16x8));
5726 __m128i result1_high_s_16x8 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_s_16x8, factorChannel10_64_s_16x8), _mm_mullo_epi16(channel1_high_s_16x8, factorChannel11_64_s_16x8)), _mm_mullo_epi16(channel2_high_s_16x8, factorChannel12_64_s_16x8));
5727 __m128i result2_high_s_16x8 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_s_16x8, factorChannel20_64_s_16x8), _mm_mullo_epi16(channel1_high_s_16x8, factorChannel21_64_s_16x8)), _mm_mullo_epi16(channel2_high_s_16x8, factorChannel22_64_s_16x8));
5740 const __m128i result0_u_8x16 = _mm_packus_epi16(result0_low_s_16x8, result0_high_s_16x8);
5741 const __m128i result1_u_8x16 = _mm_packus_epi16(result1_low_s_16x8, result1_high_s_16x8);
5742 const __m128i result2_u_8x16 = _mm_packus_epi16(result2_low_s_16x8, result2_high_s_16x8);
5746 const __m128i result01_low_u_8x16 = _mm_unpacklo_epi8(result0_u_8x16, result1_u_8x16);
5747 const __m128i result01_high_u_8x16 = _mm_unpackhi_epi8(result0_u_8x16, result1_u_8x16);
5748 const __m128i result23_low_u_8x16 = _mm_unpacklo_epi8(result2_u_8x16, channelValue3_u_8x16);
5749 const __m128i result23_high_u_8x16 = _mm_unpackhi_epi8(result2_u_8x16, channelValue3_u_8x16);
5751 const __m128i resultA_u_8x16 = _mm_unpacklo_epi16(result01_low_u_8x16, result23_low_u_8x16);
5752 const __m128i resultB_u_8x16 = _mm_unpackhi_epi16(result01_low_u_8x16, result23_low_u_8x16);
5753 const __m128i resultC_u_8x16 = _mm_unpacklo_epi16(result01_high_u_8x16, result23_high_u_8x16);
5754 const __m128i resultD_u_8x16 = _mm_unpackhi_epi16(result01_high_u_8x16, result23_high_u_8x16);
5757 _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5758 _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5759 _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5760 _mm_storeu_si128((__m128i*)target + 3, resultD_u_8x16);
5763OCEAN_FORCE_INLINE
void FrameChannels::convert4ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_128_s_16x8,
const __m128i& factorChannel10_128_s_16x8,
const __m128i& factorChannel20_128_s_16x8,
const __m128i& factorChannel01_128_s_16x8,
const __m128i& factorChannel11_128_s_16x8,
const __m128i& factorChannel21_128_s_16x8,
const __m128i& factorChannel02_128_s_16x8,
const __m128i& factorChannel12_128_s_16x8,
const __m128i& factorChannel22_128_s_16x8,
const __m128i& factorChannel03_128_s_16x8,
const __m128i& factorChannel13_128_s_16x8,
const __m128i& factorChannel23_128_s_16x8,
const __m128i& biasChannel0_s_16x8,
const __m128i& biasChannel1_s_16x8,
const __m128i& biasChannel2_s_16x8)
5765 ocean_assert(source !=
nullptr && target !=
nullptr);
5773 const __m128i pixelsA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5774 const __m128i pixelsB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5775 const __m128i pixelsC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5776 const __m128i pixelsD_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 3);
5782 const __m128i shuffle0 =
SSE::set128i(0xFFFFFFFFFFFFFFFFull, 0xFF0cFF08FF04FF00ull);
5783 const __m128i shuffle1 =
SSE::set128i(0xFFFFFFFFFFFFFFFFull, 0xFF0dFF09FF05FF01ull);
5784 const __m128i shuffle2 =
SSE::set128i(0xFFFFFFFFFFFFFFFFull, 0xFF0eFF0aFF06FF02ull);
5785 const __m128i shuffle3 =
SSE::set128i(0xFFFFFFFFFFFFFFFFull, 0xFF0fFF0bFF07FF03ull);
5788 const __m128i channel0A_u_16x8 = _mm_shuffle_epi8(pixelsA_u_8x16, shuffle0);
5789 const __m128i channel0B_u_16x8 = _mm_shuffle_epi8(pixelsB_u_8x16, shuffle0);
5790 const __m128i channel0C_u_16x8 = _mm_shuffle_epi8(pixelsC_u_8x16, shuffle0);
5791 const __m128i channel0D_u_16x8 = _mm_shuffle_epi8(pixelsD_u_8x16, shuffle0);
5794 const __m128i channel1A_u_16x8 = _mm_shuffle_epi8(pixelsA_u_8x16, shuffle1);
5795 const __m128i channel1B_u_16x8 = _mm_shuffle_epi8(pixelsB_u_8x16, shuffle1);
5796 const __m128i channel1C_u_16x8 = _mm_shuffle_epi8(pixelsC_u_8x16, shuffle1);
5797 const __m128i channel1D_u_16x8 = _mm_shuffle_epi8(pixelsD_u_8x16, shuffle1);
5800 const __m128i channel2A_u_16x8 = _mm_shuffle_epi8(pixelsA_u_8x16, shuffle2);
5801 const __m128i channel2B_u_16x8 = _mm_shuffle_epi8(pixelsB_u_8x16, shuffle2);
5802 const __m128i channel2C_u_16x8 = _mm_shuffle_epi8(pixelsC_u_8x16, shuffle2);
5803 const __m128i channel2D_u_16x8 = _mm_shuffle_epi8(pixelsD_u_8x16, shuffle2);
5806 const __m128i channel3A_u_16x8 = _mm_shuffle_epi8(pixelsA_u_8x16, shuffle3);
5807 const __m128i channel3B_u_16x8 = _mm_shuffle_epi8(pixelsB_u_8x16, shuffle3);
5808 const __m128i channel3C_u_16x8 = _mm_shuffle_epi8(pixelsC_u_8x16, shuffle3);
5809 const __m128i channel3D_u_16x8 = _mm_shuffle_epi8(pixelsD_u_8x16, shuffle3);
5812 const __m128i channel0_low_u_16x8 = _mm_or_si128(channel0A_u_16x8, _mm_slli_si128(channel0B_u_16x8, 8));
5813 const __m128i channel0_high_u_16x8 = _mm_or_si128(channel0C_u_16x8, _mm_slli_si128(channel0D_u_16x8, 8));
5815 const __m128i channel1_low_u_16x8 = _mm_or_si128(channel1A_u_16x8, _mm_slli_si128(channel1B_u_16x8, 8));
5816 const __m128i channel1_high_u_16x8 = _mm_or_si128(channel1C_u_16x8, _mm_slli_si128(channel1D_u_16x8, 8));
5818 const __m128i channel2_low_u_16x8 = _mm_or_si128(channel2A_u_16x8, _mm_slli_si128(channel2B_u_16x8, 8));
5819 const __m128i channel2_high_u_16x8 = _mm_or_si128(channel2C_u_16x8, _mm_slli_si128(channel2D_u_16x8, 8));
5821 const __m128i channel3_low_u_16x8 = _mm_or_si128(channel3A_u_16x8, _mm_slli_si128(channel3B_u_16x8, 8));
5822 const __m128i channel3_high_u_16x8 = _mm_or_si128(channel3C_u_16x8, _mm_slli_si128(channel3D_u_16x8, 8));
5828 __m128i result0_low_A_s_32x4;
5829 __m128i result0_low_B_s_32x4;
5835 __m128i result0_high_A_s_32x4;
5836 __m128i result0_high_B_s_32x4;
5843 __m128i result1_low_A_s_32x4;
5844 __m128i result1_low_B_s_32x4;
5850 __m128i result1_high_A_s_32x4;
5851 __m128i result1_high_B_s_32x4;
5858 __m128i result2_low_A_s_32x4;
5859 __m128i result2_low_B_s_32x4;
5865 __m128i result2_high_A_s_32x4;
5866 __m128i result2_high_B_s_32x4;
5873 const __m128i biasChannel0_s_32x4 = _mm_cvtepi16_epi32(biasChannel0_s_16x8);
5874 const __m128i biasChannel1_s_32x4 = _mm_cvtepi16_epi32(biasChannel1_s_16x8);
5875 const __m128i biasChannel2_s_32x4 = _mm_cvtepi16_epi32(biasChannel2_s_16x8);
5896 const __m128i result0_low_s_16x8 = _mm_packs_epi32(result0_low_A_s_32x4, result0_low_B_s_32x4);
5897 const __m128i result0_high_s_16x8 = _mm_packs_epi32(result0_high_A_s_32x4, result0_high_B_s_32x4);
5899 const __m128i result1_low_s_16x8 = _mm_packs_epi32(result1_low_A_s_32x4, result1_low_B_s_32x4);
5900 const __m128i result1_high_s_16x8 = _mm_packs_epi32(result1_high_A_s_32x4, result1_high_B_s_32x4);
5902 const __m128i result2_low_s_16x8 = _mm_packs_epi32(result2_low_A_s_32x4, result2_low_B_s_32x4);
5903 const __m128i result2_high_s_16x8 = _mm_packs_epi32(result2_high_A_s_32x4, result2_high_B_s_32x4);
5906 const __m128i result0_u_8x16 = _mm_packus_epi16(result0_low_s_16x8, result0_high_s_16x8);
5907 const __m128i result1_u_8x16 = _mm_packus_epi16(result1_low_s_16x8, result1_high_s_16x8);
5908 const __m128i result2_u_8x16 = _mm_packus_epi16(result2_low_s_16x8, result2_high_s_16x8);
5910 __m128i resultA_u_8x16;
5911 __m128i resultB_u_8x16;
5912 __m128i resultC_u_8x16;
5916 _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5917 _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5918 _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5923 ocean_assert(source !=
nullptr && target !=
nullptr);
5946 const __m128i constant64_u_8x16 = _mm_set1_epi32(0x00400040);
5948 const __m128i pixelsA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5949 const __m128i pixelsB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5950 const __m128i pixelsC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5951 const __m128i pixelsD_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 3);
5956 const __m128i intermediateResults0_u_16x8 = _mm_maddubs_epi16(pixelsA_u_8x16, multiplicationFactors0123_128_s_32x4);
5957 const __m128i intermediateResults1_u_16x8 = _mm_maddubs_epi16(pixelsB_u_8x16, multiplicationFactors0123_128_s_32x4);
5958 const __m128i intermediateResults2_u_16x8 = _mm_maddubs_epi16(pixelsC_u_8x16, multiplicationFactors0123_128_s_32x4);
5959 const __m128i intermediateResults3_u_16x8 = _mm_maddubs_epi16(pixelsD_u_8x16, multiplicationFactors0123_128_s_32x4);
5962 __m128i grayA_u_16x8 = _mm_hadd_epi16(intermediateResults0_u_16x8, intermediateResults1_u_16x8);
5963 __m128i grayB_u_16x8 = _mm_hadd_epi16(intermediateResults2_u_16x8, intermediateResults3_u_16x8);
5966 grayA_u_16x8 = _mm_add_epi16(grayA_u_16x8, constant64_u_8x16);
5967 grayB_u_16x8 = _mm_add_epi16(grayB_u_16x8, constant64_u_8x16);
5970 grayA_u_16x8 = _mm_srli_epi16(grayA_u_16x8, 7);
5971 grayB_u_16x8 = _mm_srli_epi16(grayB_u_16x8, 7);
5978 const __m128i gray_u_8x16 = _mm_packus_epi16(grayA_u_16x8, grayB_u_16x8);
5981 _mm_storeu_si128((__m128i*)target, gray_u_8x16);
5986 ocean_assert(source !=
nullptr && target !=
nullptr);
6007 const __m128i constant64_u_8x16 = _mm_set1_epi32(0x00400040);
6009 const __m128i pixelsA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
6010 const __m128i pixelsB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
6011 const __m128i pixelsC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
6012 const __m128i pixelsD_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 3);
6016 const __m128i pixelsA_u_16x8 = _mm_unpacklo_epi8(pixelsA_u_8x16, _mm_setzero_si128());
6017 const __m128i pixelsB_u_16x8 = _mm_unpackhi_epi8(pixelsA_u_8x16, _mm_setzero_si128());
6019 const __m128i pixelsC_u_16x8 = _mm_unpacklo_epi8(pixelsB_u_8x16, _mm_setzero_si128());
6020 const __m128i pixelsD_u_16x8 = _mm_unpackhi_epi8(pixelsB_u_8x16, _mm_setzero_si128());
6022 const __m128i pixelsE_u_16x8 = _mm_unpacklo_epi8(pixelsC_u_8x16, _mm_setzero_si128());
6023 const __m128i pixelsF_u_16x8 = _mm_unpackhi_epi8(pixelsC_u_8x16, _mm_setzero_si128());
6025 const __m128i pixelsG_u_16x8 = _mm_unpacklo_epi8(pixelsD_u_8x16, _mm_setzero_si128());
6026 const __m128i pixelsH_u_16x8 = _mm_unpackhi_epi8(pixelsD_u_8x16, _mm_setzero_si128());
6032 const __m128i intermediateResultsChannel0_0_u_32x4 = _mm_madd_epi16(pixelsA_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
6033 const __m128i intermediateResultsChannel0_1_u_32x4 = _mm_madd_epi16(pixelsB_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
6034 const __m128i intermediateResultsChannel0_2_u_32x4 = _mm_madd_epi16(pixelsC_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
6035 const __m128i intermediateResultsChannel0_3_u_32x4 = _mm_madd_epi16(pixelsD_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
6036 const __m128i intermediateResultsChannel0_4_u_32x4 = _mm_madd_epi16(pixelsE_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
6037 const __m128i intermediateResultsChannel0_5_u_32x4 = _mm_madd_epi16(pixelsF_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
6038 const __m128i intermediateResultsChannel0_6_u_32x4 = _mm_madd_epi16(pixelsG_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
6039 const __m128i intermediateResultsChannel0_7_u_32x4 = _mm_madd_epi16(pixelsH_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
6041 const __m128i resultsChannel0_A_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_0_u_32x4, intermediateResultsChannel0_1_u_32x4);
6042 const __m128i resultsChannel0_B_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_2_u_32x4, intermediateResultsChannel0_3_u_32x4);
6043 const __m128i resultsChannel0_C_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_4_u_32x4, intermediateResultsChannel0_5_u_32x4);
6044 const __m128i resultsChannel0_D_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_6_u_32x4, intermediateResultsChannel0_7_u_32x4);
6047 const __m128i intermediateResultsChannel1_0_u_32x4 = _mm_madd_epi16(pixelsA_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
6048 const __m128i intermediateResultsChannel1_1_u_32x4 = _mm_madd_epi16(pixelsB_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
6049 const __m128i intermediateResultsChannel1_2_u_32x4 = _mm_madd_epi16(pixelsC_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
6050 const __m128i intermediateResultsChannel1_3_u_32x4 = _mm_madd_epi16(pixelsD_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
6051 const __m128i intermediateResultsChannel1_4_u_32x4 = _mm_madd_epi16(pixelsE_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
6052 const __m128i intermediateResultsChannel1_5_u_32x4 = _mm_madd_epi16(pixelsF_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
6053 const __m128i intermediateResultsChannel1_6_u_32x4 = _mm_madd_epi16(pixelsG_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
6054 const __m128i intermediateResultsChannel1_7_u_32x4 = _mm_madd_epi16(pixelsH_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
6056 const __m128i resultsChannel1_A_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_0_u_32x4, intermediateResultsChannel1_1_u_32x4);
6057 const __m128i resultsChannel1_B_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_2_u_32x4, intermediateResultsChannel1_3_u_32x4);
6058 const __m128i resultsChannel1_C_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_4_u_32x4, intermediateResultsChannel1_5_u_32x4);
6059 const __m128i resultsChannel1_D_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_6_u_32x4, intermediateResultsChannel1_7_u_32x4);
6063 __m128i resultA_u_16x8 = _mm_or_si128(resultsChannel0_A_u_32x4, _mm_slli_epi32(resultsChannel1_A_u_32x4, 16));
6064 __m128i resultB_u_16x8 = _mm_or_si128(resultsChannel0_B_u_32x4, _mm_slli_epi32(resultsChannel1_B_u_32x4, 16));
6065 __m128i resultC_u_16x8 = _mm_or_si128(resultsChannel0_C_u_32x4, _mm_slli_epi32(resultsChannel1_C_u_32x4, 16));
6066 __m128i resultD_u_16x8 = _mm_or_si128(resultsChannel0_D_u_32x4, _mm_slli_epi32(resultsChannel1_D_u_32x4, 16));
6069 resultA_u_16x8 = _mm_add_epi16(resultA_u_16x8, constant64_u_8x16);
6070 resultB_u_16x8 = _mm_add_epi16(resultB_u_16x8, constant64_u_8x16);
6071 resultC_u_16x8 = _mm_add_epi16(resultC_u_16x8, constant64_u_8x16);
6072 resultD_u_16x8 = _mm_add_epi16(resultD_u_16x8, constant64_u_8x16);
6075 resultA_u_16x8 = _mm_srli_epi16(resultA_u_16x8, 7);
6076 resultB_u_16x8 = _mm_srli_epi16(resultB_u_16x8, 7);
6077 resultC_u_16x8 = _mm_srli_epi16(resultC_u_16x8, 7);
6078 resultD_u_16x8 = _mm_srli_epi16(resultD_u_16x8, 7);
6085 const __m128i resultAB_u_8x16 = _mm_packus_epi16(resultA_u_16x8, resultB_u_16x8);
6086 const __m128i resultCD_u_8x16 = _mm_packus_epi16(resultC_u_16x8, resultD_u_16x8);
6089 _mm_storeu_si128((__m128i*)target + 0, resultAB_u_8x16);
6090 _mm_storeu_si128((__m128i*)target + 1, resultCD_u_8x16);
6095#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
6097template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2>
6100 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2,
"Invalid multiplication factors!");
6102 ocean_assert(source !=
nullptr && target !=
nullptr);
6121 uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
6123 uint16x8_t intermediateResults_u_16x8;
6127 if constexpr (tUseFactorChannel0)
6129 intermediateResults_u_16x8 = vmull_u8(source_u_8x8x3.val[0], factorChannel0_128_u_8x8);
6133 intermediateResults_u_16x8 = vdupq_n_u16(0u);
6138 if constexpr (tUseFactorChannel1)
6140 intermediateResults_u_16x8 = vmlal_u8(intermediateResults_u_16x8, source_u_8x8x3.val[1], factorChannel1_128_u_8x8);
6145 if constexpr (tUseFactorChannel2)
6147 intermediateResults_u_16x8 = vmlal_u8(intermediateResults_u_16x8, source_u_8x8x3.val[2], factorChannel2_128_u_8x8);
6151 uint8x8_t results_u_8x8 = vqrshrn_n_u16(intermediateResults_u_16x8, 7);
6154 vst1_u8(target, results_u_8x8);
6157OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8)
6159 ocean_assert(source !=
nullptr && target !=
nullptr);
6179 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
6182 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[0], biasChannel0_u_8x8));
6183 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[1], biasChannel1_u_8x8));
6184 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[2], biasChannel2_u_8x8));
6188 int16x8_t intermediateResults0_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel00_64_s_16x8);
6189 int16x8_t intermediateResults1_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel10_64_s_16x8);
6190 int16x8_t intermediateResults2_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel20_64_s_16x8);
6192 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, vmulq_s16(source1_s_16x8, factorChannel01_64_s_16x8));
6193 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, vmulq_s16(source1_s_16x8, factorChannel11_64_s_16x8));
6194 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, vmulq_s16(source1_s_16x8, factorChannel21_64_s_16x8));
6196 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, vmulq_s16(source2_s_16x8, factorChannel02_64_s_16x8));
6197 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, vmulq_s16(source2_s_16x8, factorChannel12_64_s_16x8));
6198 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, vmulq_s16(source2_s_16x8, factorChannel22_64_s_16x8));
6200 uint8x8x3_t results_u_8x8x3;
6203 results_u_8x8x3.val[0] = vqrshrun_n_s16(intermediateResults0_s_16x8, 6);
6204 results_u_8x8x3.val[1] = vqrshrun_n_s16(intermediateResults1_s_16x8, 6);
6205 results_u_8x8x3.val[2] = vqrshrun_n_s16(intermediateResults2_s_16x8, 6);
6208 vst3_u8(target, results_u_8x8x3);
6211OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8)
6213 ocean_assert(source !=
nullptr && target !=
nullptr);
6228 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6231 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
6232 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
6233 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
6235 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
6236 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
6237 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
6241 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_64_s_16x8);
6242 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_64_s_16x8);
6243 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_64_s_16x8);
6245 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_64_s_16x8);
6246 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_64_s_16x8);
6247 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_64_s_16x8);
6249 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel01_64_s_16x8));
6250 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel11_64_s_16x8));
6251 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel21_64_s_16x8));
6253 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel01_64_s_16x8));
6254 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel11_64_s_16x8));
6255 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel21_64_s_16x8));
6257 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel02_64_s_16x8));
6258 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel12_64_s_16x8));
6259 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel22_64_s_16x8));
6261 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel02_64_s_16x8));
6262 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel12_64_s_16x8));
6263 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel22_64_s_16x8));
6265 uint8x16x3_t results_u_8x16x3;
6268 results_u_8x16x3.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 6));
6269 results_u_8x16x3.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 6));
6270 results_u_8x16x3.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 6));
6273 vst3q_u8(target, results_u_8x16x3);
6276OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_128_s_16x8,
const int16x8_t& factorChannel10_128_s_16x8,
const int16x8_t& factorChannel20_128_s_16x8,
const int16x8_t& factorChannel01_128_s_16x8,
const int16x8_t& factorChannel11_128_s_16x8,
const int16x8_t& factorChannel21_128_s_16x8,
const int16x8_t& factorChannel02_128_s_16x8,
const int16x8_t& factorChannel12_128_s_16x8,
const int16x8_t& factorChannel22_128_s_16x8,
const int16x8_t& biasChannel0_128_s_16x8,
const int16x8_t& biasChannel1_128_s_16x8,
const int16x8_t& biasChannel2_128_s_16x8)
6278 ocean_assert(source !=
nullptr && target !=
nullptr);
6298 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
6300 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[0]));
6301 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[1]));
6302 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[2]));
6304 int16x8_t intermediateResults0_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel00_128_s_16x8);
6305 int16x8_t intermediateResults1_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel10_128_s_16x8);
6306 int16x8_t intermediateResults2_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel20_128_s_16x8);
6308 intermediateResults0_s_16x8 = vmlaq_s16(intermediateResults0_s_16x8, source1_s_16x8, factorChannel01_128_s_16x8);
6309 intermediateResults1_s_16x8 = vmlaq_s16(intermediateResults1_s_16x8, source1_s_16x8, factorChannel11_128_s_16x8);
6310 intermediateResults2_s_16x8 = vmlaq_s16(intermediateResults2_s_16x8, source1_s_16x8, factorChannel21_128_s_16x8);
6312 intermediateResults0_s_16x8 = vmlaq_s16(intermediateResults0_s_16x8, source2_s_16x8, factorChannel02_128_s_16x8);
6313 intermediateResults1_s_16x8 = vmlaq_s16(intermediateResults1_s_16x8, source2_s_16x8, factorChannel12_128_s_16x8);
6314 intermediateResults2_s_16x8 = vmlaq_s16(intermediateResults2_s_16x8, source2_s_16x8, factorChannel22_128_s_16x8);
6318 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, biasChannel0_128_s_16x8);
6319 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, biasChannel1_128_s_16x8);
6320 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, biasChannel2_128_s_16x8);
6322 uint8x8x3_t results_u_8x8x3;
6325 results_u_8x8x3.val[0] = vqrshrun_n_s16(intermediateResults0_s_16x8, 7);
6326 results_u_8x8x3.val[1] = vqrshrun_n_s16(intermediateResults1_s_16x8, 7);
6327 results_u_8x8x3.val[2] = vqrshrun_n_s16(intermediateResults2_s_16x8, 7);
6330 vst3_u8(target, results_u_8x8x3);
6333OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x4_t& factorChannel00_1024_s_16x4,
const int16x4_t& factorChannel10_1024_s_16x4,
const int16x4_t& factorChannel20_1024_s_16x4,
const int16x4_t& factorChannel01_1024_s_16x4,
const int16x4_t& factorChannel11_1024_s_16x4,
const int16x4_t& factorChannel21_1024_s_16x4,
const int16x4_t& factorChannel02_1024_s_16x4,
const int16x4_t& factorChannel12_1024_s_16x4,
const int16x4_t& factorChannel22_1024_s_16x4,
const int32x4_t& biasChannel0_1024_s_32x4,
const int32x4_t& biasChannel1_1024_s_32x4,
const int32x4_t& biasChannel2_1024_s_32x4)
6335 ocean_assert(source !=
nullptr && target !=
nullptr);
6356 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
6358 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[0]));
6359 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[1]));
6360 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[2]));
6362 const int16x4_t source0_low_s_16x4 = vget_low_s16(source0_s_16x8);
6363 const int16x4_t source0_high_s_16x4 = vget_high_s16(source0_s_16x8);
6365 int32x4_t intermediateResults0_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel00_1024_s_16x4);
6366 int32x4_t intermediateResults0_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel00_1024_s_16x4);
6368 int32x4_t intermediateResults1_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel10_1024_s_16x4);
6369 int32x4_t intermediateResults1_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel10_1024_s_16x4);
6371 int32x4_t intermediateResults2_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel20_1024_s_16x4);
6372 int32x4_t intermediateResults2_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel20_1024_s_16x4);
6375 const int16x4_t source1_low_s_16x4 = vget_low_s16(source1_s_16x8);
6376 const int16x4_t source1_high_s_16x4 = vget_high_s16(source1_s_16x8);
6378 intermediateResults0_low_s_32x4 = vmlal_s16(intermediateResults0_low_s_32x4, source1_low_s_16x4, factorChannel01_1024_s_16x4);
6379 intermediateResults0_high_s_32x4 = vmlal_s16(intermediateResults0_high_s_32x4, source1_high_s_16x4, factorChannel01_1024_s_16x4);
6381 intermediateResults1_low_s_32x4 = vmlal_s16(intermediateResults1_low_s_32x4, source1_low_s_16x4, factorChannel11_1024_s_16x4);
6382 intermediateResults1_high_s_32x4 = vmlal_s16(intermediateResults1_high_s_32x4, source1_high_s_16x4, factorChannel11_1024_s_16x4);
6384 intermediateResults2_low_s_32x4 = vmlal_s16(intermediateResults2_low_s_32x4, source1_low_s_16x4, factorChannel21_1024_s_16x4);
6385 intermediateResults2_high_s_32x4 = vmlal_s16(intermediateResults2_high_s_32x4, source1_high_s_16x4, factorChannel21_1024_s_16x4);
6388 const int16x4_t source2_low_s_16x4 = vget_low_s16(source2_s_16x8);
6389 const int16x4_t source2_high_s_16x4 = vget_high_s16(source2_s_16x8);
6391 intermediateResults0_low_s_32x4 = vmlal_s16(intermediateResults0_low_s_32x4, source2_low_s_16x4, factorChannel02_1024_s_16x4);
6392 intermediateResults0_high_s_32x4 = vmlal_s16(intermediateResults0_high_s_32x4, source2_high_s_16x4, factorChannel02_1024_s_16x4);
6394 intermediateResults1_low_s_32x4 = vmlal_s16(intermediateResults1_low_s_32x4, source2_low_s_16x4, factorChannel12_1024_s_16x4);
6395 intermediateResults1_high_s_32x4 = vmlal_s16(intermediateResults1_high_s_32x4, source2_high_s_16x4, factorChannel12_1024_s_16x4);
6397 intermediateResults2_low_s_32x4 = vmlal_s16(intermediateResults2_low_s_32x4, source2_low_s_16x4, factorChannel22_1024_s_16x4);
6398 intermediateResults2_high_s_32x4 = vmlal_s16(intermediateResults2_high_s_32x4, source2_high_s_16x4, factorChannel22_1024_s_16x4);
6403 intermediateResults0_low_s_32x4 = vaddq_s32(intermediateResults0_low_s_32x4, biasChannel0_1024_s_32x4);
6404 intermediateResults0_high_s_32x4 = vaddq_s32(intermediateResults0_high_s_32x4, biasChannel0_1024_s_32x4);
6406 intermediateResults1_low_s_32x4 = vaddq_s32(intermediateResults1_low_s_32x4, biasChannel1_1024_s_32x4);
6407 intermediateResults1_high_s_32x4 = vaddq_s32(intermediateResults1_high_s_32x4, biasChannel1_1024_s_32x4);
6409 intermediateResults2_low_s_32x4 = vaddq_s32(intermediateResults2_low_s_32x4, biasChannel2_1024_s_32x4);
6410 intermediateResults2_high_s_32x4 = vaddq_s32(intermediateResults2_high_s_32x4, biasChannel2_1024_s_32x4);
6413 uint8x8x3_t results_u_8x8x3;
6416 results_u_8x8x3.val[0] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_high_s_32x4, 10)));
6417 results_u_8x8x3.val[1] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_high_s_32x4, 10)));
6418 results_u_8x8x3.val[2] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_high_s_32x4, 10)));
6421 vst3_u8(target, results_u_8x8x3);
6424OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x4_t& factorChannel00_1024_s_16x4,
const int16x4_t& factorChannel10_1024_s_16x4,
const int16x4_t& factorChannel20_1024_s_16x4,
const int16x4_t& factorChannel01_1024_s_16x4,
const int16x4_t& factorChannel11_1024_s_16x4,
const int16x4_t& factorChannel21_1024_s_16x4,
const int16x4_t& factorChannel02_1024_s_16x4,
const int16x4_t& factorChannel12_1024_s_16x4,
const int16x4_t& factorChannel22_1024_s_16x4,
const int32x4_t& biasChannel0_1024_s_32x4,
const int32x4_t& biasChannel1_1024_s_32x4,
const int32x4_t& biasChannel2_1024_s_32x4)
6426 ocean_assert(source !=
nullptr && target !=
nullptr);
6447 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6449 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[0])));
6450 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[1])));
6451 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[2])));
6453 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[0])));
6454 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[1])));
6455 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[2])));
6457 const int16x4_t source0_A_s_16x4 = vget_low_s16(source0_low_s_16x8);
6458 const int16x4_t source0_B_s_16x4 = vget_high_s16(source0_low_s_16x8);
6459 const int16x4_t source0_C_s_16x4 = vget_low_s16(source0_high_s_16x8);
6460 const int16x4_t source0_D_s_16x4 = vget_high_s16(source0_high_s_16x8);
6462 int32x4_t intermediateResults0_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel00_1024_s_16x4);
6463 int32x4_t intermediateResults0_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel00_1024_s_16x4);
6464 int32x4_t intermediateResults0_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel00_1024_s_16x4);
6465 int32x4_t intermediateResults0_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel00_1024_s_16x4);
6467 int32x4_t intermediateResults1_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel10_1024_s_16x4);
6468 int32x4_t intermediateResults1_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel10_1024_s_16x4);
6469 int32x4_t intermediateResults1_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel10_1024_s_16x4);
6470 int32x4_t intermediateResults1_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel10_1024_s_16x4);
6472 int32x4_t intermediateResults2_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel20_1024_s_16x4);
6473 int32x4_t intermediateResults2_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel20_1024_s_16x4);
6474 int32x4_t intermediateResults2_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel20_1024_s_16x4);
6475 int32x4_t intermediateResults2_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel20_1024_s_16x4);
6478 const int16x4_t source1_A_s_16x4 = vget_low_s16(source1_low_s_16x8);
6479 const int16x4_t source1_B_s_16x4 = vget_high_s16(source1_low_s_16x8);
6480 const int16x4_t source1_C_s_16x4 = vget_low_s16(source1_high_s_16x8);
6481 const int16x4_t source1_D_s_16x4 = vget_high_s16(source1_high_s_16x8);
6483 intermediateResults0_A_s_32x4 = vmlal_s16(intermediateResults0_A_s_32x4, source1_A_s_16x4, factorChannel01_1024_s_16x4);
6484 intermediateResults0_B_s_32x4 = vmlal_s16(intermediateResults0_B_s_32x4, source1_B_s_16x4, factorChannel01_1024_s_16x4);
6485 intermediateResults0_C_s_32x4 = vmlal_s16(intermediateResults0_C_s_32x4, source1_C_s_16x4, factorChannel01_1024_s_16x4);
6486 intermediateResults0_D_s_32x4 = vmlal_s16(intermediateResults0_D_s_32x4, source1_D_s_16x4, factorChannel01_1024_s_16x4);
6488 intermediateResults1_A_s_32x4 = vmlal_s16(intermediateResults1_A_s_32x4, source1_A_s_16x4, factorChannel11_1024_s_16x4);
6489 intermediateResults1_B_s_32x4 = vmlal_s16(intermediateResults1_B_s_32x4, source1_B_s_16x4, factorChannel11_1024_s_16x4);
6490 intermediateResults1_C_s_32x4 = vmlal_s16(intermediateResults1_C_s_32x4, source1_C_s_16x4, factorChannel11_1024_s_16x4);
6491 intermediateResults1_D_s_32x4 = vmlal_s16(intermediateResults1_D_s_32x4, source1_D_s_16x4, factorChannel11_1024_s_16x4);
6493 intermediateResults2_A_s_32x4 = vmlal_s16(intermediateResults2_A_s_32x4, source1_A_s_16x4, factorChannel21_1024_s_16x4);
6494 intermediateResults2_B_s_32x4 = vmlal_s16(intermediateResults2_B_s_32x4, source1_B_s_16x4, factorChannel21_1024_s_16x4);
6495 intermediateResults2_C_s_32x4 = vmlal_s16(intermediateResults2_C_s_32x4, source1_C_s_16x4, factorChannel21_1024_s_16x4);
6496 intermediateResults2_D_s_32x4 = vmlal_s16(intermediateResults2_D_s_32x4, source1_D_s_16x4, factorChannel21_1024_s_16x4);
6499 const int16x4_t source2_A_s_16x4 = vget_low_s16(source2_low_s_16x8);
6500 const int16x4_t source2_B_s_16x4 = vget_high_s16(source2_low_s_16x8);
6501 const int16x4_t source2_C_s_16x4 = vget_low_s16(source2_high_s_16x8);
6502 const int16x4_t source2_D_s_16x4 = vget_high_s16(source2_high_s_16x8);
6504 intermediateResults0_A_s_32x4 = vmlal_s16(intermediateResults0_A_s_32x4, source2_A_s_16x4, factorChannel02_1024_s_16x4);
6505 intermediateResults0_B_s_32x4 = vmlal_s16(intermediateResults0_B_s_32x4, source2_B_s_16x4, factorChannel02_1024_s_16x4);
6506 intermediateResults0_C_s_32x4 = vmlal_s16(intermediateResults0_C_s_32x4, source2_C_s_16x4, factorChannel02_1024_s_16x4);
6507 intermediateResults0_D_s_32x4 = vmlal_s16(intermediateResults0_D_s_32x4, source2_D_s_16x4, factorChannel02_1024_s_16x4);
6509 intermediateResults1_A_s_32x4 = vmlal_s16(intermediateResults1_A_s_32x4, source2_A_s_16x4, factorChannel12_1024_s_16x4);
6510 intermediateResults1_B_s_32x4 = vmlal_s16(intermediateResults1_B_s_32x4, source2_B_s_16x4, factorChannel12_1024_s_16x4);
6511 intermediateResults1_C_s_32x4 = vmlal_s16(intermediateResults1_C_s_32x4, source2_C_s_16x4, factorChannel12_1024_s_16x4);
6512 intermediateResults1_D_s_32x4 = vmlal_s16(intermediateResults1_D_s_32x4, source2_D_s_16x4, factorChannel12_1024_s_16x4);
6514 intermediateResults2_A_s_32x4 = vmlal_s16(intermediateResults2_A_s_32x4, source2_A_s_16x4, factorChannel22_1024_s_16x4);
6515 intermediateResults2_B_s_32x4 = vmlal_s16(intermediateResults2_B_s_32x4, source2_B_s_16x4, factorChannel22_1024_s_16x4);
6516 intermediateResults2_C_s_32x4 = vmlal_s16(intermediateResults2_C_s_32x4, source2_C_s_16x4, factorChannel22_1024_s_16x4);
6517 intermediateResults2_D_s_32x4 = vmlal_s16(intermediateResults2_D_s_32x4, source2_D_s_16x4, factorChannel22_1024_s_16x4);
6522 intermediateResults0_A_s_32x4 = vaddq_s32(intermediateResults0_A_s_32x4, biasChannel0_1024_s_32x4);
6523 intermediateResults0_B_s_32x4 = vaddq_s32(intermediateResults0_B_s_32x4, biasChannel0_1024_s_32x4);
6524 intermediateResults0_C_s_32x4 = vaddq_s32(intermediateResults0_C_s_32x4, biasChannel0_1024_s_32x4);
6525 intermediateResults0_D_s_32x4 = vaddq_s32(intermediateResults0_D_s_32x4, biasChannel0_1024_s_32x4);
6527 intermediateResults1_A_s_32x4 = vaddq_s32(intermediateResults1_A_s_32x4, biasChannel1_1024_s_32x4);
6528 intermediateResults1_B_s_32x4 = vaddq_s32(intermediateResults1_B_s_32x4, biasChannel1_1024_s_32x4);
6529 intermediateResults1_C_s_32x4 = vaddq_s32(intermediateResults1_C_s_32x4, biasChannel1_1024_s_32x4);
6530 intermediateResults1_D_s_32x4 = vaddq_s32(intermediateResults1_D_s_32x4, biasChannel1_1024_s_32x4);
6532 intermediateResults2_A_s_32x4 = vaddq_s32(intermediateResults2_A_s_32x4, biasChannel2_1024_s_32x4);
6533 intermediateResults2_B_s_32x4 = vaddq_s32(intermediateResults2_B_s_32x4, biasChannel2_1024_s_32x4);
6534 intermediateResults2_C_s_32x4 = vaddq_s32(intermediateResults2_C_s_32x4, biasChannel2_1024_s_32x4);
6535 intermediateResults2_D_s_32x4 = vaddq_s32(intermediateResults2_D_s_32x4, biasChannel2_1024_s_32x4);
6538 uint8x16x3_t results_u_8x16x3;
6541 results_u_8x16x3.val[0] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_D_s_32x4, 10))));
6543 results_u_8x16x3.val[1] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_D_s_32x4, 10))));
6544 results_u_8x16x3.val[2] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_D_s_32x4, 10))));
6547 vst3q_u8(target, results_u_8x16x3);
6550OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_128_s_16x8,
const int16x8_t& factorChannel10_128_s_16x8,
const int16x8_t& factorChannel20_128_s_16x8,
const int16x8_t& factorChannel01_128_s_16x8,
const int16x8_t& factorChannel11_128_s_16x8,
const int16x8_t& factorChannel21_128_s_16x8,
const int16x8_t& factorChannel02_128_s_16x8,
const int16x8_t& factorChannel12_128_s_16x8,
const int16x8_t& factorChannel22_128_s_16x8,
const int16x8_t& biasChannel0_128_s_16x8,
const int16x8_t& biasChannel1_128_s_16x8,
const int16x8_t& biasChannel2_128_s_16x8)
6552 ocean_assert(source !=
nullptr && target !=
nullptr);
6572 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6574 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[0])));
6575 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[1])));
6576 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[2])));
6578 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[0])));
6579 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[1])));
6580 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[2])));
6583 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_128_s_16x8);
6584 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_128_s_16x8);
6585 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_128_s_16x8);
6587 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_128_s_16x8);
6588 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_128_s_16x8);
6589 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_128_s_16x8);
6592 intermediateResults0_low_s_16x8 = vmlaq_s16(intermediateResults0_low_s_16x8, source1_low_s_16x8, factorChannel01_128_s_16x8);
6593 intermediateResults1_low_s_16x8 = vmlaq_s16(intermediateResults1_low_s_16x8, source1_low_s_16x8, factorChannel11_128_s_16x8);
6594 intermediateResults2_low_s_16x8 = vmlaq_s16(intermediateResults2_low_s_16x8, source1_low_s_16x8, factorChannel21_128_s_16x8);
6596 intermediateResults0_high_s_16x8 = vmlaq_s16(intermediateResults0_high_s_16x8, source1_high_s_16x8, factorChannel01_128_s_16x8);
6597 intermediateResults1_high_s_16x8 = vmlaq_s16(intermediateResults1_high_s_16x8, source1_high_s_16x8, factorChannel11_128_s_16x8);
6598 intermediateResults2_high_s_16x8 = vmlaq_s16(intermediateResults2_high_s_16x8, source1_high_s_16x8, factorChannel21_128_s_16x8);
6601 intermediateResults0_low_s_16x8 = vmlaq_s16(intermediateResults0_low_s_16x8, source2_low_s_16x8, factorChannel02_128_s_16x8);
6602 intermediateResults1_low_s_16x8 = vmlaq_s16(intermediateResults1_low_s_16x8, source2_low_s_16x8, factorChannel12_128_s_16x8);
6603 intermediateResults2_low_s_16x8 = vmlaq_s16(intermediateResults2_low_s_16x8, source2_low_s_16x8, factorChannel22_128_s_16x8);
6605 intermediateResults0_high_s_16x8 = vmlaq_s16(intermediateResults0_high_s_16x8, source2_high_s_16x8, factorChannel02_128_s_16x8);
6606 intermediateResults1_high_s_16x8 = vmlaq_s16(intermediateResults1_high_s_16x8, source2_high_s_16x8, factorChannel12_128_s_16x8);
6607 intermediateResults2_high_s_16x8 = vmlaq_s16(intermediateResults2_high_s_16x8, source2_high_s_16x8, factorChannel22_128_s_16x8);
6611 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, biasChannel0_128_s_16x8);
6612 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, biasChannel0_128_s_16x8);
6614 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, biasChannel1_128_s_16x8);
6615 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, biasChannel1_128_s_16x8);
6617 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, biasChannel2_128_s_16x8);
6618 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, biasChannel2_128_s_16x8);
6621 uint8x16x3_t results_u_8x16x3;
6624 results_u_8x16x3.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 7));
6625 results_u_8x16x3.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 7));
6626 results_u_8x16x3.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 7));
6629 vst3q_u8(target, results_u_8x16x3);
6632OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8,
const uint8x16_t& channelValue3_u_8x16)
6634 ocean_assert(source !=
nullptr && target !=
nullptr);
6649 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6652 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
6653 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
6654 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
6656 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
6657 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
6658 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
6662 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_64_s_16x8);
6663 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_64_s_16x8);
6664 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_64_s_16x8);
6666 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_64_s_16x8);
6667 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_64_s_16x8);
6668 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_64_s_16x8);
6670 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel01_64_s_16x8));
6671 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel11_64_s_16x8));
6672 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel21_64_s_16x8));
6674 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel01_64_s_16x8));
6675 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel11_64_s_16x8));
6676 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel21_64_s_16x8));
6678 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel02_64_s_16x8));
6679 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel12_64_s_16x8));
6680 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel22_64_s_16x8));
6682 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel02_64_s_16x8));
6683 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel12_64_s_16x8));
6684 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel22_64_s_16x8));
6686 uint8x16x4_t results_u_8x16x4;
6689 results_u_8x16x4.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 6));
6690 results_u_8x16x4.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 6));
6691 results_u_8x16x4.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 6));
6692 results_u_8x16x4.val[3] = channelValue3_u_8x16;
6695 vst4q_u8(target, results_u_8x16x4);
6698template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2,
bool tUseFactorChannel3>
6701 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2 || tUseFactorChannel3,
"Invalid multiplication factors!");
6703 ocean_assert(source !=
nullptr && target !=
nullptr);
6723 uint8x8x4_t pixels_u_8x8x4 = vld4_u8(source);
6725 uint16x8_t intermediateResults_16x8;
6729 if constexpr (tUseFactorChannel0)
6731 intermediateResults_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel0_128_u_8x8);
6735 intermediateResults_16x8 = vdupq_n_u16(0u);
6740 if constexpr (tUseFactorChannel1)
6742 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[1], factorChannel1_128_u_8x8);
6747 if constexpr (tUseFactorChannel2)
6749 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[2], factorChannel2_128_u_8x8);
6754 if constexpr (tUseFactorChannel3)
6756 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[3], factorChannel3_128_u_8x8);
6760 uint8x8_t results_u_8x8 = vqrshrn_n_u16(intermediateResults_16x8, 7);
6763 vst1_u8(target, results_u_8x8);
6766OCEAN_FORCE_INLINE
void FrameChannels::convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const uint8x8_t& factorChannel00_128_u_8x8,
const uint8x8_t& factorChannel10_128_u_8x8,
const uint8x8_t& factorChannel01_128_u_8x8,
const uint8x8_t& factorChannel11_128_u_8x8,
const uint8x8_t& factorChannel02_128_u_8x8,
const uint8x8_t& factorChannel12_128_u_8x8,
const uint8x8_t& factorChannel03_128_u_8x8,
const uint8x8_t& factorChannel13_128_u_8x8)
6768 ocean_assert(source !=
nullptr && target !=
nullptr);
6790 uint8x8x4_t pixels_u_8x8x4 = vld4_u8(source);
6792 uint16x8_t intermediateResultsChannel0_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel00_128_u_8x8);
6793 uint16x8_t intermediateResultsChannel1_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel10_128_u_8x8);
6795 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[1], factorChannel01_128_u_8x8);
6796 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[1], factorChannel11_128_u_8x8);
6798 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[2], factorChannel02_128_u_8x8);
6799 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[2], factorChannel12_128_u_8x8);
6801 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[3], factorChannel03_128_u_8x8);
6802 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[3], factorChannel13_128_u_8x8);
6804 uint8x8x2_t results_u_8x8x2;
6808 results_u_8x8x2.val[0] = vqrshrn_n_u16(intermediateResultsChannel0_16x8, 7);
6809 results_u_8x8x2.val[1] = vqrshrn_n_u16(intermediateResultsChannel1_16x8, 7);
6812 vst2_u8(target, results_u_8x8x2);
6815OCEAN_FORCE_INLINE
void FrameChannels::convert4ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_128_s_16x8,
const int16x8_t& factorChannel10_128_s_16x8,
const int16x8_t& factorChannel20_128_s_16x8,
const int16x8_t& factorChannel01_128_s_16x8,
const int16x8_t& factorChannel11_128_s_16x8,
const int16x8_t& factorChannel21_128_s_16x8,
const int16x8_t& factorChannel02_128_s_16x8,
const int16x8_t& factorChannel12_128_s_16x8,
const int16x8_t& factorChannel22_128_s_16x8,
const int16x8_t& factorChannel03_128_s_16x8,
const int16x8_t& factorChannel13_128_s_16x8,
const int16x8_t& factorChannel23_128_s_16x8,
const int16x8_t& biasChannel0_128_s_16x8,
const int16x8_t& biasChannel1_128_s_16x8,
const int16x8_t& biasChannel2_128_s_16x8)
6817 ocean_assert(source !=
nullptr && target !=
nullptr);
6832 const uint8x16x4_t source_u_8x16x4 = vld4q_u8(source);
6836 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x4.val[0])));
6837 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x4.val[1])));
6838 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x4.val[2])));
6839 const int16x8_t source3_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x4.val[3])));
6841 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x4.val[0])));
6842 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x4.val[1])));
6843 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x4.val[2])));
6844 const int16x8_t source3_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x4.val[3])));
6849 const int16x4_t factorChannel00_128_s_16x4 = vget_low_s16(factorChannel00_128_s_16x8);
6850 const int16x4_t factorChannel10_128_s_16x4 = vget_low_s16(factorChannel10_128_s_16x8);
6851 const int16x4_t factorChannel20_128_s_16x4 = vget_low_s16(factorChannel20_128_s_16x8);
6853 const int16x4_t factorChannel01_128_s_16x4 = vget_low_s16(factorChannel01_128_s_16x8);
6854 const int16x4_t factorChannel11_128_s_16x4 = vget_low_s16(factorChannel11_128_s_16x8);
6855 const int16x4_t factorChannel21_128_s_16x4 = vget_low_s16(factorChannel21_128_s_16x8);
6857 const int16x4_t factorChannel02_128_s_16x4 = vget_low_s16(factorChannel02_128_s_16x8);
6858 const int16x4_t factorChannel12_128_s_16x4 = vget_low_s16(factorChannel12_128_s_16x8);
6859 const int16x4_t factorChannel22_128_s_16x4 = vget_low_s16(factorChannel22_128_s_16x8);
6861 const int16x4_t factorChannel03_128_s_16x4 = vget_low_s16(factorChannel03_128_s_16x8);
6862 const int16x4_t factorChannel13_128_s_16x4 = vget_low_s16(factorChannel13_128_s_16x8);
6863 const int16x4_t factorChannel23_128_s_16x4 = vget_low_s16(factorChannel23_128_s_16x8);
6867 const int16x4_t source0_low_low_s_16x4 = vget_low_s16(source0_low_s_16x8);
6868 const int16x4_t source0_low_high_s_16x4 = vget_high_s16(source0_low_s_16x8);
6869 const int16x4_t source1_low_low_s_16x4 = vget_low_s16(source1_low_s_16x8);
6870 const int16x4_t source1_low_high_s_16x4 = vget_high_s16(source1_low_s_16x8);
6871 const int16x4_t source2_low_low_s_16x4 = vget_low_s16(source2_low_s_16x8);
6872 const int16x4_t source2_low_high_s_16x4 = vget_high_s16(source2_low_s_16x8);
6873 const int16x4_t source3_low_low_s_16x4 = vget_low_s16(source3_low_s_16x8);
6874 const int16x4_t source3_low_high_s_16x4 = vget_high_s16(source3_low_s_16x8);
6876 int32x4_t intermediateResults0_low_low_s_32x4 = vmull_s16(source0_low_low_s_16x4, factorChannel00_128_s_16x4);
6877 int32x4_t intermediateResults0_low_high_s_32x4 = vmull_s16(source0_low_high_s_16x4, factorChannel00_128_s_16x4);
6878 int32x4_t intermediateResults1_low_low_s_32x4 = vmull_s16(source0_low_low_s_16x4, factorChannel10_128_s_16x4);
6879 int32x4_t intermediateResults1_low_high_s_32x4 = vmull_s16(source0_low_high_s_16x4, factorChannel10_128_s_16x4);
6880 int32x4_t intermediateResults2_low_low_s_32x4 = vmull_s16(source0_low_low_s_16x4, factorChannel20_128_s_16x4);
6881 int32x4_t intermediateResults2_low_high_s_32x4 = vmull_s16(source0_low_high_s_16x4, factorChannel20_128_s_16x4);
6883 intermediateResults0_low_low_s_32x4 = vmlal_s16(intermediateResults0_low_low_s_32x4, source1_low_low_s_16x4, factorChannel01_128_s_16x4);
6884 intermediateResults0_low_high_s_32x4 = vmlal_s16(intermediateResults0_low_high_s_32x4, source1_low_high_s_16x4, factorChannel01_128_s_16x4);
6885 intermediateResults1_low_low_s_32x4 = vmlal_s16(intermediateResults1_low_low_s_32x4, source1_low_low_s_16x4, factorChannel11_128_s_16x4);
6886 intermediateResults1_low_high_s_32x4 = vmlal_s16(intermediateResults1_low_high_s_32x4, source1_low_high_s_16x4, factorChannel11_128_s_16x4);
6887 intermediateResults2_low_low_s_32x4 = vmlal_s16(intermediateResults2_low_low_s_32x4, source1_low_low_s_16x4, factorChannel21_128_s_16x4);
6888 intermediateResults2_low_high_s_32x4 = vmlal_s16(intermediateResults2_low_high_s_32x4, source1_low_high_s_16x4, factorChannel21_128_s_16x4);
6890 intermediateResults0_low_low_s_32x4 = vmlal_s16(intermediateResults0_low_low_s_32x4, source2_low_low_s_16x4, factorChannel02_128_s_16x4);
6891 intermediateResults0_low_high_s_32x4 = vmlal_s16(intermediateResults0_low_high_s_32x4, source2_low_high_s_16x4, factorChannel02_128_s_16x4);
6892 intermediateResults1_low_low_s_32x4 = vmlal_s16(intermediateResults1_low_low_s_32x4, source2_low_low_s_16x4, factorChannel12_128_s_16x4);
6893 intermediateResults1_low_high_s_32x4 = vmlal_s16(intermediateResults1_low_high_s_32x4, source2_low_high_s_16x4, factorChannel12_128_s_16x4);
6894 intermediateResults2_low_low_s_32x4 = vmlal_s16(intermediateResults2_low_low_s_32x4, source2_low_low_s_16x4, factorChannel22_128_s_16x4);
6895 intermediateResults2_low_high_s_32x4 = vmlal_s16(intermediateResults2_low_high_s_32x4, source2_low_high_s_16x4, factorChannel22_128_s_16x4);
6897 intermediateResults0_low_low_s_32x4 = vmlal_s16(intermediateResults0_low_low_s_32x4, source3_low_low_s_16x4, factorChannel03_128_s_16x4);
6898 intermediateResults0_low_high_s_32x4 = vmlal_s16(intermediateResults0_low_high_s_32x4, source3_low_high_s_16x4, factorChannel03_128_s_16x4);
6899 intermediateResults1_low_low_s_32x4 = vmlal_s16(intermediateResults1_low_low_s_32x4, source3_low_low_s_16x4, factorChannel13_128_s_16x4);
6900 intermediateResults1_low_high_s_32x4 = vmlal_s16(intermediateResults1_low_high_s_32x4, source3_low_high_s_16x4, factorChannel13_128_s_16x4);
6901 intermediateResults2_low_low_s_32x4 = vmlal_s16(intermediateResults2_low_low_s_32x4, source3_low_low_s_16x4, factorChannel23_128_s_16x4);
6902 intermediateResults2_low_high_s_32x4 = vmlal_s16(intermediateResults2_low_high_s_32x4, source3_low_high_s_16x4, factorChannel23_128_s_16x4);
6906 const int16x4_t source0_high_low_s_16x4 = vget_low_s16(source0_high_s_16x8);
6907 const int16x4_t source0_high_high_s_16x4 = vget_high_s16(source0_high_s_16x8);
6908 const int16x4_t source1_high_low_s_16x4 = vget_low_s16(source1_high_s_16x8);
6909 const int16x4_t source1_high_high_s_16x4 = vget_high_s16(source1_high_s_16x8);
6910 const int16x4_t source2_high_low_s_16x4 = vget_low_s16(source2_high_s_16x8);
6911 const int16x4_t source2_high_high_s_16x4 = vget_high_s16(source2_high_s_16x8);
6912 const int16x4_t source3_high_low_s_16x4 = vget_low_s16(source3_high_s_16x8);
6913 const int16x4_t source3_high_high_s_16x4 = vget_high_s16(source3_high_s_16x8);
6915 int32x4_t intermediateResults0_high_low_s_32x4 = vmull_s16(source0_high_low_s_16x4, factorChannel00_128_s_16x4);
6916 int32x4_t intermediateResults0_high_high_s_32x4 = vmull_s16(source0_high_high_s_16x4, factorChannel00_128_s_16x4);
6917 int32x4_t intermediateResults1_high_low_s_32x4 = vmull_s16(source0_high_low_s_16x4, factorChannel10_128_s_16x4);
6918 int32x4_t intermediateResults1_high_high_s_32x4 = vmull_s16(source0_high_high_s_16x4, factorChannel10_128_s_16x4);
6919 int32x4_t intermediateResults2_high_low_s_32x4 = vmull_s16(source0_high_low_s_16x4, factorChannel20_128_s_16x4);
6920 int32x4_t intermediateResults2_high_high_s_32x4 = vmull_s16(source0_high_high_s_16x4, factorChannel20_128_s_16x4);
6922 intermediateResults0_high_low_s_32x4 = vmlal_s16(intermediateResults0_high_low_s_32x4, source1_high_low_s_16x4, factorChannel01_128_s_16x4);
6923 intermediateResults0_high_high_s_32x4 = vmlal_s16(intermediateResults0_high_high_s_32x4, source1_high_high_s_16x4, factorChannel01_128_s_16x4);
6924 intermediateResults1_high_low_s_32x4 = vmlal_s16(intermediateResults1_high_low_s_32x4, source1_high_low_s_16x4, factorChannel11_128_s_16x4);
6925 intermediateResults1_high_high_s_32x4 = vmlal_s16(intermediateResults1_high_high_s_32x4, source1_high_high_s_16x4, factorChannel11_128_s_16x4);
6926 intermediateResults2_high_low_s_32x4 = vmlal_s16(intermediateResults2_high_low_s_32x4, source1_high_low_s_16x4, factorChannel21_128_s_16x4);
6927 intermediateResults2_high_high_s_32x4 = vmlal_s16(intermediateResults2_high_high_s_32x4, source1_high_high_s_16x4, factorChannel21_128_s_16x4);
6929 intermediateResults0_high_low_s_32x4 = vmlal_s16(intermediateResults0_high_low_s_32x4, source2_high_low_s_16x4, factorChannel02_128_s_16x4);
6930 intermediateResults0_high_high_s_32x4 = vmlal_s16(intermediateResults0_high_high_s_32x4, source2_high_high_s_16x4, factorChannel02_128_s_16x4);
6931 intermediateResults1_high_low_s_32x4 = vmlal_s16(intermediateResults1_high_low_s_32x4, source2_high_low_s_16x4, factorChannel12_128_s_16x4);
6932 intermediateResults1_high_high_s_32x4 = vmlal_s16(intermediateResults1_high_high_s_32x4, source2_high_high_s_16x4, factorChannel12_128_s_16x4);
6933 intermediateResults2_high_low_s_32x4 = vmlal_s16(intermediateResults2_high_low_s_32x4, source2_high_low_s_16x4, factorChannel22_128_s_16x4);
6934 intermediateResults2_high_high_s_32x4 = vmlal_s16(intermediateResults2_high_high_s_32x4, source2_high_high_s_16x4, factorChannel22_128_s_16x4);
6936 intermediateResults0_high_low_s_32x4 = vmlal_s16(intermediateResults0_high_low_s_32x4, source3_high_low_s_16x4, factorChannel03_128_s_16x4);
6937 intermediateResults0_high_high_s_32x4 = vmlal_s16(intermediateResults0_high_high_s_32x4, source3_high_high_s_16x4, factorChannel03_128_s_16x4);
6938 intermediateResults1_high_low_s_32x4 = vmlal_s16(intermediateResults1_high_low_s_32x4, source3_high_low_s_16x4, factorChannel13_128_s_16x4);
6939 intermediateResults1_high_high_s_32x4 = vmlal_s16(intermediateResults1_high_high_s_32x4, source3_high_high_s_16x4, factorChannel13_128_s_16x4);
6940 intermediateResults2_high_low_s_32x4 = vmlal_s16(intermediateResults2_high_low_s_32x4, source3_high_low_s_16x4, factorChannel23_128_s_16x4);
6941 intermediateResults2_high_high_s_32x4 = vmlal_s16(intermediateResults2_high_high_s_32x4, source3_high_high_s_16x4, factorChannel23_128_s_16x4);
6944 const int32x4_t biasChannel0_128_s_32x4 = vmovl_s16(vget_low_s16(biasChannel0_128_s_16x8));
6945 const int32x4_t biasChannel1_128_s_32x4 = vmovl_s16(vget_low_s16(biasChannel1_128_s_16x8));
6946 const int32x4_t biasChannel2_128_s_32x4 = vmovl_s16(vget_low_s16(biasChannel2_128_s_16x8));
6949 intermediateResults0_low_low_s_32x4 = vaddq_s32(intermediateResults0_low_low_s_32x4, biasChannel0_128_s_32x4);
6950 intermediateResults0_low_high_s_32x4 = vaddq_s32(intermediateResults0_low_high_s_32x4, biasChannel0_128_s_32x4);
6951 intermediateResults1_low_low_s_32x4 = vaddq_s32(intermediateResults1_low_low_s_32x4, biasChannel1_128_s_32x4);
6952 intermediateResults1_low_high_s_32x4 = vaddq_s32(intermediateResults1_low_high_s_32x4, biasChannel1_128_s_32x4);
6953 intermediateResults2_low_low_s_32x4 = vaddq_s32(intermediateResults2_low_low_s_32x4, biasChannel2_128_s_32x4);
6954 intermediateResults2_low_high_s_32x4 = vaddq_s32(intermediateResults2_low_high_s_32x4, biasChannel2_128_s_32x4);
6956 intermediateResults0_high_low_s_32x4 = vaddq_s32(intermediateResults0_high_low_s_32x4, biasChannel0_128_s_32x4);
6957 intermediateResults0_high_high_s_32x4 = vaddq_s32(intermediateResults0_high_high_s_32x4, biasChannel0_128_s_32x4);
6958 intermediateResults1_high_low_s_32x4 = vaddq_s32(intermediateResults1_high_low_s_32x4, biasChannel1_128_s_32x4);
6959 intermediateResults1_high_high_s_32x4 = vaddq_s32(intermediateResults1_high_high_s_32x4, biasChannel1_128_s_32x4);
6960 intermediateResults2_high_low_s_32x4 = vaddq_s32(intermediateResults2_high_low_s_32x4, biasChannel2_128_s_32x4);
6961 intermediateResults2_high_high_s_32x4 = vaddq_s32(intermediateResults2_high_high_s_32x4, biasChannel2_128_s_32x4);
6964 uint8x16x3_t results_u_8x16x3;
6968 results_u_8x16x3.val[0] = vcombine_u8(
6969 vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_low_low_s_32x4, 7), vqrshrun_n_s32(intermediateResults0_low_high_s_32x4, 7))),
6970 vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_high_low_s_32x4, 7), vqrshrun_n_s32(intermediateResults0_high_high_s_32x4, 7))));
6971 results_u_8x16x3.val[1] = vcombine_u8(
6972 vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_low_low_s_32x4, 7), vqrshrun_n_s32(intermediateResults1_low_high_s_32x4, 7))),
6973 vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_high_low_s_32x4, 7), vqrshrun_n_s32(intermediateResults1_high_high_s_32x4, 7))));
6974 results_u_8x16x3.val[2] = vcombine_u8(
6975 vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_low_low_s_32x4, 7), vqrshrun_n_s32(intermediateResults2_low_high_s_32x4, 7))),
6976 vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_high_low_s_32x4, 7), vqrshrun_n_s32(intermediateResults2_high_high_s_32x4, 7))));
6979 vst3q_u8(target, results_u_8x16x3);
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition FrameChannels.h:51
static bool premultipliedAlphaToStraightAlpha(const Frame &source, Frame &target, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
static bool zipChannels(const Frames &sourceFrames, Frame &targetFrame, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Zips/interleaves 1-channel images into one image with n-channels.
static bool separateTo1Channel(const Frame &sourceFrame, Frames &targetFrames, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static bool premultipliedAlphaToStraightAlpha(Frame &frame, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
static bool separateTo1Channel(const Frame &sourceFrame, const std::initializer_list< Frame * > &targetFrames, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static bool zipChannels(const std::initializer_list< Frame > &sourceFrames, Frame &targetFrame, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Zips/interleaves 1-channel images into one image with n-channels.
static bool straightAlphaToPremultipliedAlpha(Frame &frame, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
static bool straightAlphaToPremultipliedAlpha(const Frame &source, Frame &target, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
This class implements frame channel conversion, transformation and extraction functions.
Definition FrameChannels.h:31
static void reverseChannelOrder(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Reverses the order of the channels of a frame with zipped pixel format.
Definition FrameChannels.h:2964
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_1024_s_16x8, const __m128i &factorChannel10_1024_s_16x8, const __m128i &factorChannel20_1024_s_16x8, const __m128i &factorChannel01_1024_s_16x8, const __m128i &factorChannel11_1024_s_16x8, const __m128i &factorChannel21_1024_s_16x8, const __m128i &factorChannel02_1024_s_16x8, const __m128i &factorChannel12_1024_s_16x8, const __m128i &factorChannel22_1024_s_16x8, const __m128i &biasChannel0_1024_s_32x4, const __m128i &biasChannel1_1024_s_32x4, const __m128i &biasChannel2_1024_s_32x4)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5470
static void addChannelValueRow(const T *source, T *target, const size_t size, const void *channelValueParameter)
Adds a channel to a given row with generic (zipped) pixel format and sets all values to a specified v...
Definition FrameChannels.h:4416
static void shuffleRowChannelsAndSetLastChannelValue(const T *source, T *target, const size_t size, const void *options=nullptr)
Shuffles the channels of row pixels by application of a specified shuffle pattern and sets the last c...
Definition FrameChannels.h:3871
static OCEAN_FORCE_INLINE void convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_64_s_16x8, const __m128i &factorChannel10_64_s_16x8, const __m128i &factorChannel20_64_s_16x8, const __m128i &factorChannel01_64_s_16x8, const __m128i &factorChannel11_64_s_16x8, const __m128i &factorChannel21_64_s_16x8, const __m128i &factorChannel02_64_s_16x8, const __m128i &factorChannel12_64_s_16x8, const __m128i &factorChannel22_64_s_16x8, const __m128i &biasChannel0_s_16x8, const __m128i &biasChannel1_s_16x8, const __m128i &biasChannel2_s_16x8, const __m128i &channelValue3_u_8x16)
Converts 16 pixels with 3 channels per pixel to 16 pixels with four channel per pixel by a linear com...
Definition FrameChannels.h:5683
static void separateTo1Channel(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition FrameChannels.h:1971
static OCEAN_FORCE_INLINE void convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8, const uint8x16_t &channelValue3_u_8x16)
Converts 16 pixels with 3 channels per pixel to 16 pixels with 4 channels per pixel by a linear combi...
Definition FrameChannels.h:6632
static void addChannelRow(const void **sources, void **targets, const unsigned int multipleRowIndex, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const void *options)
Adds a channel to a given row with generic (zipped) pixel format and copies the information of the ne...
Definition FrameChannels.h:4316
static void shuffleChannelsAndSetLastChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Shuffles the channels of source frame and sets the last channel with constant value in the target fra...
Definition FrameChannels.h:4034
static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactors0_128_u_16x8, const __m128i &multiplicationFactors1_128_u_16x8, const __m128i &multiplicationFactors2_128_u_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with one channel per pixel by a linear comb...
Definition FrameChannels.h:5316
static void shuffleChannels(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Shuffles the channels of a frame by an arbitrary pattern.
Definition FrameChannels.h:4006
static void convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *channelMultiplicationFactors_128)
Converts a row of pixels with 3 channels to pixels with one channel by a linear combination of the fo...
Definition FrameChannels.h:5006
static void convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *channelMultiplicationFactors_128)
Converts a row of pixels with 4 channels to pixels with one channel by a linear combination of the fo...
Definition FrameChannels.h:5079
static void copyChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Copies one channel from a given frame with zipped pixel format to another frame with zipped pixel for...
Definition FrameChannels.h:2923
static void zipChannels(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition FrameChannels.h:2722
static void straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t *const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
Definition FrameChannels.h:4219
static void applyRowOperator(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const RowOperatorFunction< TSource, TTarget, tSourceChannels, tTargetChannels > &rowOperatorFunction, Worker *worker=nullptr)
Applies a row operator to all rows of a source image.
Definition FrameChannels.h:4134
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:6211
static void convertRow4ChannelsTo2Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *multiplicationFactors_128)
Converts a row of pixels with 4 channels to pixels with two channel by a linear combination of the fo...
static void setChannelSubset(T *frame, const unsigned int width, const T value, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Sets one channel of a frame with one unique value.
Definition FrameChannels.h:4615
static void applyBivariateOperatorSubset(const TSource0 *source0, const TSource1 *source1, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Generic bivariate pixel operations.
Definition FrameChannels.h:4850
static void applyAdvancedPixelModifier(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:4096
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition FrameChannels.h:6157
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x4_t &factorChannel00_1024_s_16x4, const int16x4_t &factorChannel10_1024_s_16x4, const int16x4_t &factorChannel20_1024_s_16x4, const int16x4_t &factorChannel01_1024_s_16x4, const int16x4_t &factorChannel11_1024_s_16x4, const int16x4_t &factorChannel21_1024_s_16x4, const int16x4_t &factorChannel02_1024_s_16x4, const int16x4_t &factorChannel12_1024_s_16x4, const int16x4_t &factorChannel22_1024_s_16x4, const int32x4_t &biasChannel0_1024_s_32x4, const int32x4_t &biasChannel1_1024_s_32x4, const int32x4_t &biasChannel2_1024_s_32x4)
Converts 8 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear com...
Definition FrameChannels.h:6424
static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel0_128_u_8x8, const uint8x8_t &factorChannel1_128_u_8x8, const uint8x8_t &factorChannel2_128_u_8x8, const uint8x8_t &factorChannel3_128_u_8x8)
Converts 8 pixels with 4 channels per pixel to 8 pixels with one channel per pixel by a linear combin...
static void addFirstChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be th...
Definition FrameChannels.h:2835
static void addLastChannel(const T *source, const T *sourceNewChannel, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the ba...
Definition FrameChannels.h:2855
static void removeFirstChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Removes the first channel from a given frame with zipped (generic) pixel format.
Definition FrameChannels.h:2891
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x4_t &factorChannel00_1024_s_16x4, const int16x4_t &factorChannel10_1024_s_16x4, const int16x4_t &factorChannel20_1024_s_16x4, const int16x4_t &factorChannel01_1024_s_16x4, const int16x4_t &factorChannel11_1024_s_16x4, const int16x4_t &factorChannel21_1024_s_16x4, const int16x4_t &factorChannel02_1024_s_16x4, const int16x4_t &factorChannel12_1024_s_16x4, const int16x4_t &factorChannel22_1024_s_16x4, const int32x4_t &biasChannel0_1024_s_32x4, const int32x4_t &biasChannel1_1024_s_32x4, const int32x4_t &biasChannel2_1024_s_32x4)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition FrameChannels.h:6333
static void addLastChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be th...
Definition FrameChannels.h:2871
static void convertRow3ChannelsTo3Channels8BitPerChannel6BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void reverseRowPixelOrderInPlace(T *data, const size_t size)
Reverses/mirrors the order of pixels in a given row (or a memory block in general) in place.
Definition FrameChannels.h:3141
static void applyRowOperatorSubset(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const RowOperatorFunction< TSource, TTarget, tSourceChannels, tTargetChannels > rowOperatorFunction, const unsigned int firstRow, const unsigned int numberRows)
Applies a row operator to a subset of all rows of a source image.
Definition FrameChannels.h:4983
static void applyPixelModifier(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:4078
static void narrowRow16BitPerChannelTo8BitPerChannel(const uint16_t *source, uint8_t *target, const size_t size, const void *unusedParameters=nullptr)
Narrows a row of pixels with 16 bit channels to pixels with 8 bit channels.
Definition FrameChannels.h:4257
static void applyAdvancedPixelModifierSubset(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:4743
static void convertRow4ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static OCEAN_FORCE_INLINE void convert4ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_128_s_16x8, const __m128i &factorChannel10_128_s_16x8, const __m128i &factorChannel20_128_s_16x8, const __m128i &factorChannel01_128_s_16x8, const __m128i &factorChannel11_128_s_16x8, const __m128i &factorChannel21_128_s_16x8, const __m128i &factorChannel02_128_s_16x8, const __m128i &factorChannel12_128_s_16x8, const __m128i &factorChannel22_128_s_16x8, const __m128i &factorChannel03_128_s_16x8, const __m128i &factorChannel13_128_s_16x8, const __m128i &factorChannel23_128_s_16x8, const __m128i &biasChannel0_s_16x8, const __m128i &biasChannel1_s_16x8, const __m128i &biasChannel2_s_16x8)
Converts 16 pixels with 4 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5763
static void shuffleRowChannels(const T *source, T *target, const size_t size, const void *unusedOptions=nullptr)
Shuffles the channels of row pixels by application of a specified shuffle pattern.
Definition FrameChannels.h:3511
static void premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t *const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
Definition FrameChannels.h:4181
static void convertRow3ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_128_s_16x8, const int16x8_t &factorChannel10_128_s_16x8, const int16x8_t &factorChannel20_128_s_16x8, const int16x8_t &factorChannel01_128_s_16x8, const int16x8_t &factorChannel11_128_s_16x8, const int16x8_t &factorChannel21_128_s_16x8, const int16x8_t &factorChannel02_128_s_16x8, const int16x8_t &factorChannel12_128_s_16x8, const int16x8_t &factorChannel22_128_s_16x8, const int16x8_t &biasChannel0_128_s_16x8, const int16x8_t &biasChannel1_128_s_16x8, const int16x8_t &biasChannel2_128_s_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with 3 channels per pixel by a linear combi...
Definition FrameChannels.h:6550
static void convertRow3ChannelsTo4Channels8BitPerChannel6BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 4 channels by a linear combination of the thr...
static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME
Definition of a constant to specify that the number of channels are not known at compile time but at ...
Definition FrameChannels.h:37
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_128_s_16x8, const int16x8_t &factorChannel10_128_s_16x8, const int16x8_t &factorChannel20_128_s_16x8, const int16x8_t &factorChannel01_128_s_16x8, const int16x8_t &factorChannel11_128_s_16x8, const int16x8_t &factorChannel21_128_s_16x8, const int16x8_t &factorChannel02_128_s_16x8, const int16x8_t &factorChannel12_128_s_16x8, const int16x8_t &factorChannel22_128_s_16x8, const int16x8_t &biasChannel0_128_s_16x8, const int16x8_t &biasChannel1_128_s_16x8, const int16x8_t &biasChannel2_128_s_16x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition FrameChannels.h:6276
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_64_s_16x8, const __m128i &factorChannel10_64_s_16x8, const __m128i &factorChannel20_64_s_16x8, const __m128i &factorChannel01_64_s_16x8, const __m128i &factorChannel11_64_s_16x8, const __m128i &factorChannel21_64_s_16x8, const __m128i &factorChannel02_64_s_16x8, const __m128i &factorChannel12_64_s_16x8, const __m128i &factorChannel22_64_s_16x8, const __m128i &biasChannel0_s_16x8, const __m128i &biasChannel1_s_16x8, const __m128i &biasChannel2_s_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5607
static void copyChannelRow(const T *source, T *target, const size_t size, const void *unusedParameters=nullptr)
Copies one channel from a source row to a target row with generic (zipped) pixel format.
Definition FrameChannels.h:4455
static void reverseRowPixelOrder(const T *source, T *target, const size_t size)
Reverses/mirrors the order of pixels in a given row (or a memory block in general).
Definition FrameChannels.h:2980
static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel0_128_u_8x8, const uint8x8_t &factorChannel1_128_u_8x8, const uint8x8_t &factorChannel2_128_u_8x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with one channel per pixel by a linear combin...
static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactors0123_128_s_32x)
Converts 16 pixels with 4 channels per pixel to 16 pixels with one channel per pixel by a linear comb...
Definition FrameChannels.h:5921
static void removeLastChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Removes the last channel from a given frame with zipped (generic) pixel format.
Definition FrameChannels.h:2907
static void transformGeneric(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker)
Transforms a frame with generic pixel format (with zipped pixel information) like RGB24 or YUV24,...
Definition FrameChannels.h:4156
static void setChannel(T *frame, const unsigned int width, const unsigned int height, const T value, const unsigned int framePaddingElements, Worker *worker=nullptr)
Sets one channel of a frame with a specific unique value.
Definition FrameChannels.h:2945
static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t *const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
Definition FrameChannels.h:5243
static void narrow16BitPerChannelTo8BitPerChannel(const uint16_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Narrows 16 bit channels of a frame to 8 bit channels.
Definition FrameChannels.h:4062
static OCEAN_FORCE_INLINE void convert4ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_128_s_16x8, const int16x8_t &factorChannel10_128_s_16x8, const int16x8_t &factorChannel20_128_s_16x8, const int16x8_t &factorChannel01_128_s_16x8, const int16x8_t &factorChannel11_128_s_16x8, const int16x8_t &factorChannel21_128_s_16x8, const int16x8_t &factorChannel02_128_s_16x8, const int16x8_t &factorChannel12_128_s_16x8, const int16x8_t &factorChannel22_128_s_16x8, const int16x8_t &factorChannel03_128_s_16x8, const int16x8_t &factorChannel13_128_s_16x8, const int16x8_t &factorChannel23_128_s_16x8, const int16x8_t &biasChannel0_128_s_16x8, const int16x8_t &biasChannel1_128_s_16x8, const int16x8_t &biasChannel2_128_s_16x8)
Converts 16 pixels with 4 channels per pixel to 16 pixels with three channels per pixel by a linear c...
Definition FrameChannels.h:6815
static void transformGenericSubset(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const RowReversePixelOrderFunction< void > rowReversePixelOrderFunction, const unsigned int bytesPerRow, const unsigned int sourceStrideBytes, const unsigned int targetStrideBytes, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a frame with generic pixel format (with zipped pixel information) like RGB24 o...
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_128_s_16x8, const __m128i &factorChannel10_128_s_16x8, const __m128i &factorChannel20_128_s_16x8, const __m128i &factorChannel01_128_s_16x8, const __m128i &factorChannel11_128_s_16x8, const __m128i &factorChannel21_128_s_16x8, const __m128i &factorChannel02_128_s_16x8, const __m128i &factorChannel12_128_s_16x8, const __m128i &factorChannel22_128_s_16x8, const __m128i &biasChannel0_s_16x8, const __m128i &biasChannel1_s_16x8, const __m128i &biasChannel2_s_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5382
static void reverseRowChannelOrder(const T *source, T *target, const size_t size, const void *unusedOptions=nullptr)
Reverses/mirrors the order of channels in a given row (or a memory block in general).
Definition FrameChannels.h:3319
static void convertRow3ChannelsTo3Channels8BitPerChannel10BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void applyBivariateOperator(const TSource0 *source0, const TSource1 *source1, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Generic bivariate pixel operations Applies bivariate per-pixel operators: C(y, x) = op(A(y,...
Definition FrameChannels.h:4115
static void addFirstChannel(const T *source, const T *sourceNewChannel, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the fr...
Definition FrameChannels.h:2819
static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel00_128_u_8x8, const uint8x8_t &factorChannel10_128_u_8x8, const uint8x8_t &factorChannel01_128_u_8x8, const uint8x8_t &factorChannel11_128_u_8x8, const uint8x8_t &factorChannel02_128_u_8x8, const uint8x8_t &factorChannel12_128_u_8x8, const uint8x8_t &factorChannel03_128_u_8x8, const uint8x8_t &factorChannel13_128_u_8x8)
Converts 8 pixels with 4 channels per pixel to 8 pixels with two channels per pixel by a linear combi...
Definition FrameChannels.h:6766
static void separateTo1ChannelRuntime(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition FrameChannels.h:4476
static void zipChannelsRuntime(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition FrameChannels.h:4549
void(*)(const TSource *sourceRow, TTarget *targetRow, const unsigned int width, const unsigned int height, unsigned int rowIndex, const unsigned int sourceStrideElements, const unsigned int targetStrideElements) RowOperatorFunction
Definition of a function pointer to a function able to operate on an entire image row.
Definition FrameChannels.h:43
static void applyPixelModifierSubset(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:4638
static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t *const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
Definition FrameChannels.h:5154
static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactorsChannel0_0123_128_s_16x8, const __m128i &multiplicationFactorsChannel1_0123_128_s_16x8)
Converts 16 pixels with 4 channels per pixel to 16 pixels with two channel per pixel by a linear comb...
Definition FrameChannels.h:5984
This is the base class for all frame converter classes.
Definition FrameConverter.h:32
ConversionFlag
Definition of individual conversion flags.
Definition FrameConverter.h:39
@ CONVERT_NORMAL
Normal conversion, neither flips nor mirrors the image.
Definition FrameConverter.h:49
@ CONVERT_FLIPPED_AND_MIRRORED
Rotated conversion, rotates the image by 180.0 degrees with anchor in the center of the image.
Definition FrameConverter.h:82
@ CONVERT_MIRRORED
Mirrored conversion, exchanges left and right of the image (like in a mirror, mirroring around the y-...
Definition FrameConverter.h:71
@ CONVERT_FLIPPED
Flipped conversion, exchanges top and bottom of the image (flipping around the x-axis).
Definition FrameConverter.h:60
static void convertGenericPixelFormat(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const ConversionFlag flag, const RowConversionFunction< TSource, TTarget > rowConversionFunction, const RowReversePixelOrderInPlaceFunction< TTarget > targetReversePixelOrderInPlaceFunction, const bool areContinuous, const void *options, Worker *worker)
Converts a frame with generic pixel format (e.g., RGBA32, BGR24, YUV24, ...) to a frame with generic ...
Definition FrameConverter.h:3483
void(*)(T *row, const size_t width) RowReversePixelOrderInPlaceFunction
Definition of a function pointer to a function able to reverse the order of pixels in an image row wi...
Definition FrameConverter.h:603
void(*)(const T *inputRow, T *targetRow, const size_t width) RowReversePixelOrderFunction
Definition of a function pointer to a function able to reverse the order of pixels in an image row wi...
Definition FrameConverter.h:594
static void convertArbitraryPixelFormat(const void **sources, void **targets, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int multipleRowsPerIteration, const MultipleRowsConversionFunction multipleRowsConversionFunction, const void *options, Worker *worker)
Converts a frame with arbitrary pixel format (e.g., Y_UV12, Y_VU12, YUYV16, ...) to a frame with arbi...
Definition FrameConverter.h:3506
static OCEAN_FORCE_INLINE uint8x16_t cast16ElementsNEON(const float32x4_t &sourceA_f_32x4, const float32x4_t &sourceB_f_32x4, const float32x4_t &sourceC_f_32x4, const float32x4_t &sourceD_f_32x4)
Casts 16 float elements to 16 uint8_t elements.
Definition NEON.h:1216
static __m128i divideByRightShiftSigned32Bit(const __m128i &value, const unsigned int rightShifts)
Divides eight signed 32 bit values by applying a right shift.
Definition SSE.h:3173
static __m128i load128i(const void *const buffer)
Loads a 128i value from the memory.
Definition SSE.h:3724
static void store128i(const __m128i &value, uint8_t *const buffer)
Stores a 128i value to the memory.
Definition SSE.h:3869
static __m128i divideByRightShiftSigned16Bit(const __m128i &value, const unsigned int rightShifts)
Divides eight int16_t values by applying a right shift.
Definition SSE.h:3104
static OCEAN_FORCE_INLINE void multiplyInt8x16ToInt32x8AndAccumulate(const __m128i &values0, const __m128i &values1, __m128i &results0, __m128i &results1)
Multiplies 8 int16_t values with 8 int16_t values and adds the products to 8 int32_t values.
Definition SSE.h:4014
static OCEAN_FORCE_INLINE void interleave3Channel8Bit48Elements(const __m128i &channel0, const __m128i &channel1, const __m128i &channel2, __m128i &interleavedA, __m128i &interleavedB, __m128i &interleavedC)
Interleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
Definition SSE.h:3410
static OCEAN_FORCE_INLINE void reverseChannelOrder3Channel8Bit48Elements(const __m128i &interleaved0, const __m128i &interleaved1, const __m128i &interleaved2, __m128i &reversedInterleaved0, __m128i &reversedInterleaved1, __m128i &reversedInterleaved2)
Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channe...
Definition SSE.h:3492
static __m128i removeHighBits16_8(const __m128i &value)
Removes the higher 8 bits of eight 16 bit elements.
Definition SSE.h:3904
static OCEAN_FORCE_INLINE void deInterleave3Channel8Bit48Elements(const __m128i &interleavedA, const __m128i &interleavedB, const __m128i &interleavedC, __m128i &channel0, __m128i &channel1, __m128i &channel2)
Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
Definition SSE.h:3369
static __m128i set128i(const unsigned long long high64, const unsigned long long low64)
Sets a 128i value by two 64 bit values.
Definition SSE.h:3875
static OCEAN_FORCE_INLINE void reverseChannelOrder4Channel8Bit64Elements(const uint8_t *interleaved, uint8_t *reversedInterleaved)
Reverses the order of the channels of 16 pixels (64 elements) of an image with 4 interleaved channels...
Definition SSE.h:3517
static OCEAN_FORCE_INLINE void multiplyInt8x16ToInt32x8(const __m128i &values0, const __m128i &values1, __m128i &products0, __m128i &products1)
Multiplies 8 int16_t values with 8 int16_t values and returns the products as 8 int32_t results.
Definition SSE.h:4005
static OCEAN_FORCE_INLINE void reverseChannelOrder2Channel8Bit32Elements(const uint8_t *interleaved, uint8_t *reversedInterleaved)
Reverses the order of the channels of 16 pixels (32 elements) of an image with 2 interleaved channels...
Definition SSE.h:3477
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition Caller.h:2877
This class implements Ocean's image class.
Definition Frame.h:1879
PixelFormat
Definition of all pixel formats available in the Ocean framework.
Definition Frame.h:183
typename TypeMapperBySize< sizeof(T)>::Type Type
Definition of an invalid mapped data type.
Definition DataType.h:508
This class implements a worker able to distribute function calls over different threads.
Definition Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
std::vector< Index32 > Indices32
Definition of a vector holding 32 bit index values.
Definition Base.h:96
std::vector< Frame > Frames
Definition of a vector holding padding frames.
Definition Frame.h:1842
The namespace covering the entire Ocean framework.
Definition Accessor.h:15
Default definition of a type with tBytes bytes.
Definition DataType.h:32