8 #ifndef META_OCEAN_CV_FRAME_CHANNELS_H
9 #define META_OCEAN_CV_FRAME_CHANNELS_H
37 static constexpr
unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME = 0u;
42 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
43 using RowOperatorFunction = void(*)(
const TSource* sourceRow, TTarget* targetRow,
const unsigned int width,
const unsigned int height,
unsigned int rowIndex,
const unsigned int sourceStrideElements,
const unsigned int targetStrideElements);
207 template <
typename TSource,
typename TTarget,
unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
208 static void separateTo1Channel(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements);
236 template <
typename TSource,
typename TTarget>
237 static void separateTo1Channel(
const TSource*
const sourceFrame,
const std::initializer_list<TTarget*>& targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int sourceFramePaddingElements,
const std::initializer_list<const unsigned int>& targetFramesPaddingElements);
265 template <
typename TSource,
typename TTarget,
unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
266 static void zipChannels(
const TSource*
const*
const sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements);
294 template <
typename TSource,
typename TTarget>
295 static void zipChannels(
const std::initializer_list<const TSource*>& sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const std::initializer_list<unsigned int>& sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements);
312 template <
typename T,
unsigned int tSourceChannels>
313 static inline void addFirstChannel(
const T* source,
const T* sourceNewChannel, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int sourceNewChannelPaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
329 template <
typename T,
unsigned int tSourceChannels>
330 static inline void addFirstChannelValue(
const T* source,
const T newChannelValue, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
347 template <
typename T,
unsigned int tSourceChannels>
348 static inline void addLastChannel(
const T* source,
const T* sourceNewChannel, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int sourceNewChannelPaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
364 template <
typename T,
unsigned int tSourceChannels>
365 static inline void addLastChannelValue(
const T* source,
const T newChannelValue, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
382 template <
typename T,
unsigned int tSourceChannels>
383 static inline void removeFirstChannel(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
400 template <
typename T,
unsigned int tSourceChannels>
401 static inline void removeLastChannel(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
418 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tSourceChannelIndex,
unsigned int tTargetChannelIndex>
419 static inline void copyChannel(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
433 template <
typename T,
unsigned int tChannel,
unsigned int tChannels>
434 static inline void setChannel(T* frame,
const unsigned int width,
const unsigned int height,
const T value,
const unsigned int framePaddingElements,
Worker* worker =
nullptr);
450 template <
typename T,
unsigned int tChannels>
451 static inline void reverseChannelOrder(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
477 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
478 static inline void shuffleChannels(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
505 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
506 static inline void shuffleChannelsAndSetLastChannelValue(
const T* source,
const T newChannelValue, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
520 template <
unsigned int tChannels>
521 static inline void narrow16BitPerChannelTo8BitPerChannel(
const uint16_t* source, uint8_t* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
535 template <
typename T,
unsigned int tChannels,
void (*tPixelFunction)(const T*, T*)>
536 static void applyPixelModifier(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
Worker* worker =
nullptr);
554 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tPixelFunction)(const TSource*, TTarget*)>
555 static void applyAdvancedPixelModifier(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
Worker* worker =
nullptr);
578 template <
typename TSource0,
typename TSource1,
typename TTarget,
typename TIntermediate,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
579 static void applyBivariateOperator(
const TSource0* source0,
const TSource1* source1, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int source0PaddingElements,
const unsigned int source1PaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
Worker* worker =
nullptr);
598 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
599 static void applyRowOperator(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels>& rowOperatorFunction,
Worker* worker =
nullptr);
615 template <
typename T,
unsigned int tChannels>
616 static inline void transformGeneric(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker);
629 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
630 static inline void premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t*
const frame,
const unsigned int width,
const unsigned int height,
const unsigned int framePaddingElements,
Worker* worker =
nullptr);
645 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
646 static inline void premultipliedAlphaToStraightAlpha8BitPerChannel(
const uint8_t*
const source, uint8_t*
const target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
659 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
660 static inline void straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t*
const frame,
const unsigned int width,
const unsigned int height,
const unsigned int framePaddingElements,
Worker* worker =
nullptr);
675 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
676 static inline void straightAlphaToPremultipliedAlpha8BitPerChannel(
const uint8_t*
const source, uint8_t*
const target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
686 template <
typename T,
unsigned int tChannels>
687 static void reverseRowPixelOrder(
const T* source, T* target,
const size_t size);
696 template <
typename T,
unsigned int tChannels>
697 static void reverseRowPixelOrderInPlace(T* data,
const size_t size);
708 template <
typename T,
unsigned int tChannels>
709 static void reverseRowChannelOrder(
const T* source, T* target,
const size_t size,
const void* unusedOptions =
nullptr);
731 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
732 static inline void shuffleRowChannels(
const T* source, T* target,
const size_t size,
const void* unusedOptions =
nullptr);
754 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
755 static inline void shuffleRowChannelsAndSetLastChannelValue(
const T* source, T* target,
const size_t size,
const void* options =
nullptr);
770 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2>
771 static void convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(
const uint8_t* source, uint8_t* target,
const size_t size,
const void* channelMultiplicationFactors_128);
872 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2,
bool tUseFactorChannel3>
873 static void convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(
const uint8_t* source, uint8_t* target,
const size_t size,
const void* channelMultiplicationFactors_128);
920 template <
unsigned int tChannels>
921 static void narrowRow16BitPerChannelTo8BitPerChannel(
const uint16_t* source, uint8_t* target,
const size_t size,
const void* unusedParameters =
nullptr);
937 template <
typename T,
unsigned int tSourceChannels,
bool tAddToFront>
938 static void addChannelRow(
const void** sources,
void** targets,
const unsigned int multipleRowIndex,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const void* options);
951 template <
typename T,
unsigned int tSourceChannels,
bool tAddToFront>
952 static void addChannelValueRow(
const T* source, T* target,
const size_t size,
const void* channelValueParameter);
966 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tSourceChannelIndex,
unsigned int tTargetChannelIndex>
967 static void copyChannelRow(
const T* source, T* target,
const size_t size,
const void* unusedParameters =
nullptr);
983 template <
typename TSource,
typename TTarget>
984 static void separateTo1ChannelRuntime(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements);
998 template <
typename TSource,
typename TTarget>
999 static void zipChannelsRuntime(
const TSource*
const*
const sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements);
1013 template <
typename T,
unsigned int tChannel,
unsigned int tChannels>
1014 static void setChannelSubset(T* frame,
const unsigned int width,
const T value,
const unsigned int framePaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1029 template <
typename T,
unsigned int tChannels,
void (*tPixelFunction)(const T*, T*)>
1030 static void applyPixelModifierSubset(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows);
1049 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tPixelFunction)(const TSource*, TTarget*)>
1050 static void applyAdvancedPixelModifierSubset(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows);
1073 template <
typename TSource0,
typename TSource1,
typename TTarget,
typename TIntermediate,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
1074 static void applyBivariateOperatorSubset(
const TSource0* source0,
const TSource1* source1, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int source0PaddingElements,
const unsigned int source1PaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows);
1094 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
1095 static void applyRowOperatorSubset(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourceStrideElements,
const unsigned int targetStrideElements,
const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels> rowOperatorFunction,
const unsigned int firstRow,
const unsigned int numberRows);
1111 static void transformGenericSubset(
const uint8_t* source, uint8_t* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const RowReversePixelOrderFunction<void> rowReversePixelOrderFunction,
const unsigned int bytesPerRow,
const unsigned int sourceStrideBytes,
const unsigned int targetStrideBytes,
const unsigned int firstRow,
const unsigned int numberRows);
1123 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
1124 static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t*
const frame,
const unsigned int width,
const unsigned int framePaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1138 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
1139 static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(
const uint8_t*
const source, uint8_t*
const target,
const unsigned int width,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1151 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
1152 static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t*
const frame,
const unsigned int width,
const unsigned int framePaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1166 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
1167 static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(
const uint8_t*
const source, uint8_t*
const target,
const unsigned int width,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1169 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1182 static OCEAN_FORCE_INLINE
void convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& multiplicationFactors0_128_u_16x8,
const __m128i& multiplicationFactors1_128_u_16x8,
const __m128i& multiplicationFactors2_128_u_16x8);
1211 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_128_s_16x8,
const __m128i& factorChannel10_128_s_16x8,
const __m128i& factorChannel20_128_s_16x8,
const __m128i& factorChannel01_128_s_16x8,
const __m128i& factorChannel11_128_s_16x8,
const __m128i& factorChannel21_128_s_16x8,
const __m128i& factorChannel02_128_s_16x8,
const __m128i& factorChannel12_128_s_16x8,
const __m128i& factorChannel22_128_s_16x8,
const __m128i& biasChannel0_s_16x8,
const __m128i& biasChannel1_s_16x8,
const __m128i& biasChannel2_s_16x8);
1240 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_1024_s_16x8,
const __m128i& factorChannel10_1024_s_16x8,
const __m128i& factorChannel20_1024_s_16x8,
const __m128i& factorChannel01_1024_s_16x8,
const __m128i& factorChannel11_1024_s_16x8,
const __m128i& factorChannel21_1024_s_16x8,
const __m128i& factorChannel02_1024_s_16x8,
const __m128i& factorChannel12_1024_s_16x8,
const __m128i& factorChannel22_1024_s_16x8,
const __m128i& biasChannel0_1024_s_32x4,
const __m128i& biasChannel1_1024_s_32x4,
const __m128i& biasChannel2_1024_s_32x4);
1251 static OCEAN_FORCE_INLINE
void convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& multiplicationFactors0123_128_s_32x);
1263 static OCEAN_FORCE_INLINE
void convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& multiplicationFactorsChannel0_0123_128_s_16x8,
const __m128i& multiplicationFactorsChannel1_0123_128_s_16x8);
1267 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1283 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2>
1313 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8);
1342 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8);
1371 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_128_s_16x8,
const int16x8_t& factorChannel10_128_s_16x8,
const int16x8_t& factorChannel20_128_s_16x8,
const int16x8_t& factorChannel01_128_s_16x8,
const int16x8_t& factorChannel11_128_s_16x8,
const int16x8_t& factorChannel21_128_s_16x8,
const int16x8_t& factorChannel02_128_s_16x8,
const int16x8_t& factorChannel12_128_s_16x8,
const int16x8_t& factorChannel22_128_s_16x8,
const int16x8_t& biasChannel0_128_s_16x8,
const int16x8_t& biasChannel1_128_s_16x8,
const int16x8_t& biasChannel2_128_s_16x8);
1400 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x4_t& factorChannel00_1024_s_16x4,
const int16x4_t& factorChannel10_1024_s_16x4,
const int16x4_t& factorChannel20_1024_s_16x4,
const int16x4_t& factorChannel01_1024_s_16x4,
const int16x4_t& factorChannel11_1024_s_16x4,
const int16x4_t& factorChannel21_1024_s_16x4,
const int16x4_t& factorChannel02_1024_s_16x4,
const int16x4_t& factorChannel12_1024_s_16x4,
const int16x4_t& factorChannel22_1024_s_16x4,
const int32x4_t& biasChannel0_1024_s_32x4,
const int32x4_t& biasChannel1_1024_s_32x4,
const int32x4_t& biasChannel2_1024_s_32x4);
1429 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x4_t& factorChannel00_1024_s_16x4,
const int16x4_t& factorChannel10_1024_s_16x4,
const int16x4_t& factorChannel20_1024_s_16x4,
const int16x4_t& factorChannel01_1024_s_16x4,
const int16x4_t& factorChannel11_1024_s_16x4,
const int16x4_t& factorChannel21_1024_s_16x4,
const int16x4_t& factorChannel02_1024_s_16x4,
const int16x4_t& factorChannel12_1024_s_16x4,
const int16x4_t& factorChannel22_1024_s_16x4,
const int32x4_t& biasChannel0_1024_s_32x4,
const int32x4_t& biasChannel1_1024_s_32x4,
const int32x4_t& biasChannel2_1024_s_32x4);
1458 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_128_s_16x8,
const int16x8_t& factorChannel10_128_s_16x8,
const int16x8_t& factorChannel20_128_s_16x8,
const int16x8_t& factorChannel01_128_s_16x8,
const int16x8_t& factorChannel11_128_s_16x8,
const int16x8_t& factorChannel21_128_s_16x8,
const int16x8_t& factorChannel02_128_s_16x8,
const int16x8_t& factorChannel12_128_s_16x8,
const int16x8_t& factorChannel22_128_s_16x8,
const int16x8_t& biasChannel0_128_s_16x8,
const int16x8_t& biasChannel1_128_s_16x8,
const int16x8_t& biasChannel2_128_s_16x8);
1490 static OCEAN_FORCE_INLINE
void convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8,
const uint8x16_t& channelValue3_u_8x16);
1508 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2,
bool tUseFactorChannel3>
1527 static OCEAN_FORCE_INLINE
void convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const uint8x8_t& factorChannel00_128_u_8x8,
const uint8x8_t& factorChannel10_128_u_8x8,
const uint8x8_t& factorChannel01_128_u_8x8,
const uint8x8_t& factorChannel11_128_u_8x8,
const uint8x8_t& factorChannel02_128_u_8x8,
const uint8x8_t& factorChannel12_128_u_8x8,
const uint8x8_t& factorChannel03_128_u_8x8,
const uint8x8_t& factorChannel13_128_u_8x8);
1533 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1536 inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 2u>(
const uint8_t*
const sourceFrame, uint8_t*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
1538 ocean_assert(sourceFrame !=
nullptr);
1539 ocean_assert(targetFrames !=
nullptr);
1541 ocean_assert(width != 0u && height != 0u);
1542 ocean_assert(channels == 2u);
1544 constexpr
unsigned int tChannels = 2u;
1546 bool allTargetFramesContinuous =
true;
1548 if (targetFramesPaddingElements !=
nullptr)
1550 for (
unsigned int n = 0u; n < tChannels; ++n)
1552 if (targetFramesPaddingElements[n] != 0u)
1554 allTargetFramesContinuous =
false;
1560 const uint8_t* source = sourceFrame;
1561 uint8_t* target0 = targetFrames[0];
1562 uint8_t* target1 = targetFrames[1];
1564 constexpr
unsigned int tBlockSize = 16u;
1566 uint8x16x2_t source_8x16x2;
1568 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1570 const unsigned int pixels = width * height;
1571 const unsigned int blocks = pixels / tBlockSize;
1572 const unsigned int remaining = pixels % tBlockSize;
1574 for (
unsigned int n = 0u; n < blocks; ++n)
1576 source_8x16x2 = vld2q_u8(source);
1578 vst1q_u8(target0, source_8x16x2.val[0]);
1579 vst1q_u8(target1, source_8x16x2.val[1]);
1581 source += tBlockSize * tChannels;
1583 target0 += tBlockSize;
1584 target1 += tBlockSize;
1587 for (
unsigned int n = 0u; n < remaining; ++n)
1589 target0[n] = source[n * tChannels + 0u];
1590 target1[n] = source[n * tChannels + 1u];
1595 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[0];
1596 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[1];
1598 const unsigned int blocks = width / tBlockSize;
1599 const unsigned int remaining = width % tBlockSize;
1601 for (
unsigned int y = 0u; y < height; ++y)
1603 for (
unsigned int n = 0u; n < blocks; ++n)
1605 source_8x16x2 = vld2q_u8(source);
1607 vst1q_u8(target0, source_8x16x2.val[0]);
1608 vst1q_u8(target1, source_8x16x2.val[1]);
1610 source += tBlockSize * tChannels;
1612 target0 += tBlockSize;
1613 target1 += tBlockSize;
1616 for (
unsigned int n = 0u; n < remaining; ++n)
1618 target0[n] = source[n * tChannels + 0u];
1619 target1[n] = source[n * tChannels + 1u];
1622 source += remaining * tChannels + sourceFramePaddingElements;
1623 target0 += remaining + targetFrame0PaddingElements;
1624 target1 += remaining + targetFrame1PaddingElements;
1630 inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 3u>(
const uint8_t*
const sourceFrame, uint8_t*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
1632 ocean_assert(sourceFrame !=
nullptr);
1633 ocean_assert(targetFrames !=
nullptr);
1635 ocean_assert(width != 0u && height != 0u);
1636 ocean_assert(channels == 3u);
1638 constexpr
unsigned int tChannels = 3u;
1640 bool allTargetFramesContinuous =
true;
1642 if (targetFramesPaddingElements !=
nullptr)
1644 for (
unsigned int n = 0u; n < tChannels; ++n)
1646 if (targetFramesPaddingElements[n] != 0u)
1648 allTargetFramesContinuous =
false;
1654 const uint8_t* source = sourceFrame;
1655 uint8_t* target0 = targetFrames[0];
1656 uint8_t* target1 = targetFrames[1];
1657 uint8_t* target2 = targetFrames[2];
1659 constexpr
unsigned int tBlockSize = 16u;
1661 uint8x16x3_t source_8x16x3;
1663 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1665 const unsigned int pixels = width * height;
1666 const unsigned int blocks = pixels / tBlockSize;
1667 const unsigned int remaining = pixels % tBlockSize;
1669 for (
unsigned int n = 0u; n < blocks; ++n)
1671 source_8x16x3 = vld3q_u8(source);
1673 vst1q_u8(target0, source_8x16x3.val[0]);
1674 vst1q_u8(target1, source_8x16x3.val[1]);
1675 vst1q_u8(target2, source_8x16x3.val[2]);
1677 source += tBlockSize * tChannels;
1679 target0 += tBlockSize;
1680 target1 += tBlockSize;
1681 target2 += tBlockSize;
1684 for (
unsigned int n = 0u; n < remaining; ++n)
1686 target0[n] = source[n * tChannels + 0u];
1687 target1[n] = source[n * tChannels + 1u];
1688 target2[n] = source[n * tChannels + 2u];
1693 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[0];
1694 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[1];
1695 const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[2];
1697 const unsigned int blocks = width / tBlockSize;
1698 const unsigned int remaining = width % tBlockSize;
1700 for (
unsigned int y = 0u; y < height; ++y)
1702 for (
unsigned int n = 0u; n < blocks; ++n)
1704 source_8x16x3 = vld3q_u8(source);
1706 vst1q_u8(target0, source_8x16x3.val[0]);
1707 vst1q_u8(target1, source_8x16x3.val[1]);
1708 vst1q_u8(target2, source_8x16x3.val[2]);
1710 source += tBlockSize * tChannels;
1712 target0 += tBlockSize;
1713 target1 += tBlockSize;
1714 target2 += tBlockSize;
1717 for (
unsigned int n = 0u; n < remaining; ++n)
1719 target0[n] = source[n * tChannels + 0u];
1720 target1[n] = source[n * tChannels + 1u];
1721 target2[n] = source[n * tChannels + 2u];
1724 source += remaining * tChannels + sourceFramePaddingElements;
1725 target0 += remaining + targetFrame0PaddingElements;
1726 target1 += remaining + targetFrame1PaddingElements;
1727 target2 += remaining + targetFrame2PaddingElements;
1733 inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 4u>(
const uint8_t*
const sourceFrame, uint8_t*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
1735 ocean_assert(sourceFrame !=
nullptr);
1736 ocean_assert(targetFrames !=
nullptr);
1738 ocean_assert(width != 0u && height != 0u);
1739 ocean_assert(channels == 4u);
1741 constexpr
unsigned int tChannels = 4u;
1743 bool allTargetFramesContinuous =
true;
1745 if (targetFramesPaddingElements !=
nullptr)
1747 for (
unsigned int n = 0u; n < tChannels; ++n)
1749 if (targetFramesPaddingElements[n] != 0u)
1751 allTargetFramesContinuous =
false;
1757 const uint8_t* source = sourceFrame;
1758 uint8_t* target0 = targetFrames[0];
1759 uint8_t* target1 = targetFrames[1];
1760 uint8_t* target2 = targetFrames[2];
1761 uint8_t* target3 = targetFrames[3];
1763 constexpr
unsigned int tBlockSize = 16u;
1765 uint8x16x4_t source_8x16x4;
1767 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1769 const unsigned int pixels = width * height;
1770 const unsigned int blocks = pixels / tBlockSize;
1771 const unsigned int remaining = pixels % tBlockSize;
1773 for (
unsigned int n = 0u; n < blocks; ++n)
1775 source_8x16x4 = vld4q_u8(source);
1777 vst1q_u8(target0, source_8x16x4.val[0]);
1778 vst1q_u8(target1, source_8x16x4.val[1]);
1779 vst1q_u8(target2, source_8x16x4.val[2]);
1780 vst1q_u8(target3, source_8x16x4.val[3]);
1782 source += tBlockSize * tChannels;
1784 target0 += tBlockSize;
1785 target1 += tBlockSize;
1786 target2 += tBlockSize;
1787 target3 += tBlockSize;
1790 for (
unsigned int n = 0u; n < remaining; ++n)
1792 target0[n] = source[n * tChannels + 0u];
1793 target1[n] = source[n * tChannels + 1u];
1794 target2[n] = source[n * tChannels + 2u];
1795 target3[n] = source[n * tChannels + 3u];
1800 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[0];
1801 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[1];
1802 const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[2];
1803 const unsigned int targetFrame3PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[3];
1805 const unsigned int blocks = width / tBlockSize;
1806 const unsigned int remaining = width % tBlockSize;
1808 for (
unsigned int y = 0u; y < height; ++y)
1810 for (
unsigned int n = 0u; n < blocks; ++n)
1812 source_8x16x4 = vld4q_u8(source);
1814 vst1q_u8(target0, source_8x16x4.val[0]);
1815 vst1q_u8(target1, source_8x16x4.val[1]);
1816 vst1q_u8(target2, source_8x16x4.val[2]);
1817 vst1q_u8(target3, source_8x16x4.val[3]);
1819 source += tBlockSize * tChannels;
1821 target0 += tBlockSize;
1822 target1 += tBlockSize;
1823 target2 += tBlockSize;
1824 target3 += tBlockSize;
1827 for (
unsigned int n = 0u; n < remaining; ++n)
1829 target0[n] = source[n * tChannels + 0u];
1830 target1[n] = source[n * tChannels + 1u];
1831 target2[n] = source[n * tChannels + 2u];
1832 target3[n] = source[n * tChannels + 3u];
1835 source += remaining * tChannels + sourceFramePaddingElements;
1836 target0 += remaining + targetFrame0PaddingElements;
1837 target1 += remaining + targetFrame1PaddingElements;
1838 target2 += remaining + targetFrame2PaddingElements;
1839 target3 += remaining + targetFrame3PaddingElements;
1846 template <
typename TSource,
typename TTarget,
unsigned int tChannels>
1847 void FrameChannels::separateTo1Channel(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
1849 ocean_assert(sourceFrame !=
nullptr);
1850 ocean_assert(targetFrames !=
nullptr);
1852 ocean_assert(width != 0u && height != 0u);
1858 separateTo1ChannelRuntime<TSource, TTarget>(sourceFrame, targetFrames, width, height, channels, sourceFramePaddingElements, targetFramesPaddingElements);
1863 for (
unsigned int c = 0u; c < tChannels; ++c)
1865 ocean_assert(targetFrames[c] !=
nullptr);
1869 if (sourceFramePaddingElements == 0u && targetFramesPaddingElements ==
nullptr)
1871 for (
unsigned int n = 0u; n < width * height; ++n)
1873 for (
unsigned int c = 0u; c < tChannels; ++c)
1875 targetFrames[c][n] = TTarget(sourceFrame[n * tChannels + c]);
1879 else if (targetFramesPaddingElements ==
nullptr)
1881 ocean_assert(sourceFramePaddingElements != 0u);
1883 const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
1885 for (
unsigned int y = 0u; y < height; ++y)
1887 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
1889 const unsigned int targetRowOffset = y * width;
1891 for (
unsigned int x = 0u; x < width; ++x)
1893 for (
unsigned int c = 0u; c < tChannels; ++c)
1895 *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * tChannels + c));
1902 const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
1904 Indices32 targetFrameStrideElements(tChannels);
1906 for (
unsigned int c = 0u; c < tChannels; ++c)
1908 targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
1911 for (
unsigned int y = 0u; y < height; ++y)
1913 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
1915 for (
unsigned int x = 0u; x < width; ++x)
1917 for (
unsigned int c = 0u; c < tChannels; ++c)
1919 *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * tChannels + c));
1926 template <
typename TSource,
typename TTarget>
1927 void FrameChannels::separateTo1Channel(
const TSource*
const sourceFrame,
const std::initializer_list<TTarget*>& targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int sourceFramePaddingElements,
const std::initializer_list<const unsigned int>& targetFramesPaddingElements)
1929 ocean_assert(targetFrames.size() >= 1);
1930 ocean_assert(targetFramesPaddingElements.size() == 0 || targetFrames.size() == targetFramesPaddingElements.size());
1932 if (targetFrames.size() == 2)
1934 separateTo1Channel<TSource, TTarget, 2u>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
1936 else if (targetFrames.size() == 3)
1938 separateTo1Channel<TSource, TTarget, 3u>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
1940 else if (targetFrames.size() == 4)
1942 separateTo1Channel<TSource, TTarget, 4u>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
1946 separateTo1Channel<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
1950 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1953 inline void FrameChannels::zipChannels<uint8_t, uint8_t, 2u>(
const uint8_t*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
1955 ocean_assert(sourceFrames !=
nullptr);
1956 ocean_assert(targetFrame !=
nullptr);
1958 ocean_assert(width != 0u && height != 0u);
1959 ocean_assert(channels == 2u);
1961 constexpr
unsigned int tChannels = 2u;
1963 bool allSourceFramesContinuous =
true;
1965 if (sourceFramesPaddingElements !=
nullptr)
1967 for (
unsigned int n = 0u; n < tChannels; ++n)
1969 if (sourceFramesPaddingElements[n] != 0u)
1971 allSourceFramesContinuous =
false;
1977 const uint8_t* source0 = sourceFrames[0];
1978 const uint8_t* source1 = sourceFrames[1];
1979 uint8_t* target = targetFrame;
1981 constexpr
unsigned int tBlockSize = 16u;
1983 uint8x16x2_t source_8x16x2;
1985 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
1987 const unsigned int pixels = width * height;
1988 const unsigned int blocks = pixels / tBlockSize;
1989 const unsigned int remaining = pixels % tBlockSize;
1991 for (
unsigned int n = 0u; n < blocks; ++n)
1993 source_8x16x2.val[0] = vld1q_u8(source0);
1994 source_8x16x2.val[1] = vld1q_u8(source1);
1996 vst2q_u8(target, source_8x16x2);
1998 source0 += tBlockSize;
1999 source1 += tBlockSize;
2001 target += tBlockSize * tChannels;
2004 for (
unsigned int n = 0u; n < remaining; ++n)
2006 target[n * tChannels + 0u] = source0[n];
2007 target[n * tChannels + 1u] = source1[n];
2012 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2013 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2015 const unsigned int blocks = width / tBlockSize;
2016 const unsigned int remaining = width % tBlockSize;
2018 for (
unsigned int y = 0u; y < height; ++y)
2020 for (
unsigned int n = 0u; n < blocks; ++n)
2022 source_8x16x2.val[0] = vld1q_u8(source0);
2023 source_8x16x2.val[1] = vld1q_u8(source1);
2025 vst2q_u8(target, source_8x16x2);
2027 source0 += tBlockSize;
2028 source1 += tBlockSize;
2030 target += tBlockSize * tChannels;
2033 for (
unsigned int n = 0u; n < remaining; ++n)
2035 target[n * tChannels + 0u] = source0[n];
2036 target[n * tChannels + 1u] = source1[n];
2039 source0 += remaining + sourceFrame0PaddingElements;
2040 source1 += remaining + sourceFrame1PaddingElements;
2041 target += remaining * tChannels + targetFramePaddingElements;
2047 inline void FrameChannels::zipChannels<uint8_t, uint8_t, 3u>(
const uint8_t*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2049 ocean_assert(sourceFrames !=
nullptr);
2050 ocean_assert(targetFrame !=
nullptr);
2052 ocean_assert(width != 0u && height != 0u);
2053 ocean_assert(channels == 3u);
2055 constexpr
unsigned int tChannels = 3u;
2057 bool allSourceFramesContinuous =
true;
2059 if (sourceFramesPaddingElements !=
nullptr)
2061 for (
unsigned int n = 0u; n < tChannels; ++n)
2063 if (sourceFramesPaddingElements[n] != 0u)
2065 allSourceFramesContinuous =
false;
2071 const uint8_t* source0 = sourceFrames[0];
2072 const uint8_t* source1 = sourceFrames[1];
2073 const uint8_t* source2 = sourceFrames[2];
2074 uint8_t* target = targetFrame;
2076 constexpr
unsigned int tBlockSize = 16u;
2078 uint8x16x3_t source_8x16x3;
2080 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2082 const unsigned int pixels = width * height;
2083 const unsigned int blocks = pixels / tBlockSize;
2084 const unsigned int remaining = pixels % tBlockSize;
2086 for (
unsigned int n = 0u; n < blocks; ++n)
2088 source_8x16x3.val[0] = vld1q_u8(source0);
2089 source_8x16x3.val[1] = vld1q_u8(source1);
2090 source_8x16x3.val[2] = vld1q_u8(source2);
2092 vst3q_u8(target, source_8x16x3);
2094 source0 += tBlockSize;
2095 source1 += tBlockSize;
2096 source2 += tBlockSize;
2098 target += tBlockSize * tChannels;
2101 for (
unsigned int n = 0u; n < remaining; ++n)
2103 target[n * tChannels + 0u] = source0[n];
2104 target[n * tChannels + 1u] = source1[n];
2105 target[n * tChannels + 2u] = source2[n];
2110 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2111 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2112 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[2];
2114 const unsigned int blocks = width / tBlockSize;
2115 const unsigned int remaining = width % tBlockSize;
2117 for (
unsigned int y = 0u; y < height; ++y)
2119 for (
unsigned int n = 0u; n < blocks; ++n)
2121 source_8x16x3.val[0] = vld1q_u8(source0);
2122 source_8x16x3.val[1] = vld1q_u8(source1);
2123 source_8x16x3.val[2] = vld1q_u8(source2);
2125 vst3q_u8(target, source_8x16x3);
2127 source0 += tBlockSize;
2128 source1 += tBlockSize;
2129 source2 += tBlockSize;
2131 target += tBlockSize * tChannels;
2134 for (
unsigned int n = 0u; n < remaining; ++n)
2136 target[n * tChannels + 0u] = source0[n];
2137 target[n * tChannels + 1u] = source1[n];
2138 target[n * tChannels + 2u] = source2[n];
2141 source0 += remaining + sourceFrame0PaddingElements;
2142 source1 += remaining + sourceFrame1PaddingElements;
2143 source2 += remaining + sourceFrame2PaddingElements;
2144 target += remaining * tChannels + targetFramePaddingElements;
2150 inline void FrameChannels::zipChannels<uint8_t, uint8_t, 4u>(
const uint8_t*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2152 ocean_assert(sourceFrames !=
nullptr);
2153 ocean_assert(targetFrame !=
nullptr);
2155 ocean_assert(width != 0u && height != 0u);
2156 ocean_assert(channels == 4u);
2158 constexpr
unsigned int tChannels = 4u;
2160 bool allSourceFramesContinuous =
true;
2162 if (sourceFramesPaddingElements !=
nullptr)
2164 for (
unsigned int n = 0u; n < tChannels; ++n)
2166 if (sourceFramesPaddingElements[n] != 0u)
2168 allSourceFramesContinuous =
false;
2174 const uint8_t* source0 = sourceFrames[0];
2175 const uint8_t* source1 = sourceFrames[1];
2176 const uint8_t* source2 = sourceFrames[2];
2177 const uint8_t* source3 = sourceFrames[3];
2178 uint8_t* target = targetFrame;
2180 constexpr
unsigned int tBlockSize = 16u;
2182 uint8x16x4_t source_8x16x4;
2184 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2186 const unsigned int pixels = width * height;
2187 const unsigned int blocks = pixels / tBlockSize;
2188 const unsigned int remaining = pixels % tBlockSize;
2190 for (
unsigned int n = 0u; n < blocks; ++n)
2192 source_8x16x4.val[0] = vld1q_u8(source0);
2193 source_8x16x4.val[1] = vld1q_u8(source1);
2194 source_8x16x4.val[2] = vld1q_u8(source2);
2195 source_8x16x4.val[3] = vld1q_u8(source3);
2197 vst4q_u8(target, source_8x16x4);
2199 source0 += tBlockSize;
2200 source1 += tBlockSize;
2201 source2 += tBlockSize;
2202 source3 += tBlockSize;
2204 target += tBlockSize * tChannels;
2207 for (
unsigned int n = 0u; n < remaining; ++n)
2209 target[n * tChannels + 0u] = source0[n];
2210 target[n * tChannels + 1u] = source1[n];
2211 target[n * tChannels + 2u] = source2[n];
2212 target[n * tChannels + 3u] = source3[n];
2217 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2218 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2219 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[2];
2220 const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[3];
2222 const unsigned int blocks = width / tBlockSize;
2223 const unsigned int remaining = width % tBlockSize;
2225 for (
unsigned int y = 0u; y < height; ++y)
2227 for (
unsigned int n = 0u; n < blocks; ++n)
2229 source_8x16x4.val[0] = vld1q_u8(source0);
2230 source_8x16x4.val[1] = vld1q_u8(source1);
2231 source_8x16x4.val[2] = vld1q_u8(source2);
2232 source_8x16x4.val[3] = vld1q_u8(source3);
2234 vst4q_u8(target, source_8x16x4);
2236 source0 += tBlockSize;
2237 source1 += tBlockSize;
2238 source2 += tBlockSize;
2239 source3 += tBlockSize;
2241 target += tBlockSize * tChannels;
2244 for (
unsigned int n = 0u; n < remaining; ++n)
2246 target[n * tChannels + 0u] = source0[n];
2247 target[n * tChannels + 1u] = source1[n];
2248 target[n * tChannels + 2u] = source2[n];
2249 target[n * tChannels + 3u] = source3[n];
2252 source0 += remaining + sourceFrame0PaddingElements;
2253 source1 += remaining + sourceFrame1PaddingElements;
2254 source2 += remaining + sourceFrame2PaddingElements;
2255 source3 += remaining + sourceFrame3PaddingElements;
2256 target += remaining * tChannels + targetFramePaddingElements;
2262 inline void FrameChannels::zipChannels<float, uint8_t, 2u>(
const float*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2264 ocean_assert(sourceFrames !=
nullptr);
2265 ocean_assert(targetFrame !=
nullptr);
2267 ocean_assert(width != 0u && height != 0u);
2268 ocean_assert(channels == 2u);
2270 constexpr
unsigned int tChannels = 2u;
2272 bool allSourceFramesContinuous =
true;
2274 if (sourceFramesPaddingElements !=
nullptr)
2276 for (
unsigned int n = 0u; n < tChannels; ++n)
2278 if (sourceFramesPaddingElements[n] != 0u)
2280 allSourceFramesContinuous =
false;
2286 const float* source0 = sourceFrames[0];
2287 const float* source1 = sourceFrames[1];
2288 uint8_t* target = targetFrame;
2290 constexpr
unsigned int tBlockSize = 16u;
2292 uint8x16x2_t target_8x16x2;
2294 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2296 const unsigned int pixels = width * height;
2297 const unsigned int blocks = pixels / tBlockSize;
2298 const unsigned int remaining = pixels % tBlockSize;
2300 for (
unsigned int n = 0u; n < blocks; ++n)
2305 vst2q_u8(target, target_8x16x2);
2307 source0 += tBlockSize;
2308 source1 += tBlockSize;
2310 target += tBlockSize * tChannels;
2313 for (
unsigned int n = 0u; n < remaining; ++n)
2315 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2316 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2318 target[n * tChannels + 0u] = uint8_t(source0[n]);
2319 target[n * tChannels + 1u] = uint8_t(source1[n]);
2324 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2325 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2327 const unsigned int blocks = width / tBlockSize;
2328 const unsigned int remaining = width % tBlockSize;
2330 for (
unsigned int y = 0u; y < height; ++y)
2332 for (
unsigned int n = 0u; n < blocks; ++n)
2337 vst2q_u8(target, target_8x16x2);
2339 source0 += tBlockSize;
2340 source1 += tBlockSize;
2342 target += tBlockSize * tChannels;
2345 for (
unsigned int n = 0u; n < remaining; ++n)
2347 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2348 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2350 target[n * tChannels + 0u] = uint8_t(source0[n]);
2351 target[n * tChannels + 1u] = uint8_t(source1[n]);
2354 source0 += remaining + sourceFrame0PaddingElements;
2355 source1 += remaining + sourceFrame1PaddingElements;
2356 target += remaining * tChannels + targetFramePaddingElements;
2362 inline void FrameChannels::zipChannels<float, uint8_t, 3u>(
const float*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2364 ocean_assert(sourceFrames !=
nullptr);
2365 ocean_assert(targetFrame !=
nullptr);
2367 ocean_assert(width != 0u && height != 0u);
2368 ocean_assert(channels == 3u);
2370 constexpr
unsigned int tChannels = 3u;
2372 bool allSourceFramesContinuous =
true;
2374 if (sourceFramesPaddingElements !=
nullptr)
2376 for (
unsigned int n = 0u; n < tChannels; ++n)
2378 if (sourceFramesPaddingElements[n] != 0u)
2380 allSourceFramesContinuous =
false;
2386 const float* source0 = sourceFrames[0];
2387 const float* source1 = sourceFrames[1];
2388 const float* source2 = sourceFrames[2];
2389 uint8_t* target = targetFrame;
2391 constexpr
unsigned int tBlockSize = 16u;
2393 uint8x16x3_t target_8x16x3;
2395 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2397 const unsigned int pixels = width * height;
2398 const unsigned int blocks = pixels / tBlockSize;
2399 const unsigned int remaining = pixels % tBlockSize;
2401 for (
unsigned int n = 0u; n < blocks; ++n)
2407 vst3q_u8(target, target_8x16x3);
2409 source0 += tBlockSize;
2410 source1 += tBlockSize;
2411 source2 += tBlockSize;
2413 target += tBlockSize * tChannels;
2416 for (
unsigned int n = 0u; n < remaining; ++n)
2418 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2419 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2420 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2422 target[n * tChannels + 0u] = uint8_t(source0[n]);
2423 target[n * tChannels + 1u] = uint8_t(source1[n]);
2424 target[n * tChannels + 2u] = uint8_t(source2[n]);
2429 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2430 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2431 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[2];
2433 const unsigned int blocks = width / tBlockSize;
2434 const unsigned int remaining = width % tBlockSize;
2436 for (
unsigned int y = 0u; y < height; ++y)
2438 for (
unsigned int n = 0u; n < blocks; ++n)
2445 vst3q_u8(target, target_8x16x3);
2447 source0 += tBlockSize;
2448 source1 += tBlockSize;
2449 source2 += tBlockSize;
2451 target += tBlockSize * tChannels;
2454 for (
unsigned int n = 0u; n < remaining; ++n)
2456 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2457 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2458 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2460 target[n * tChannels + 0u] = uint8_t(source0[n]);
2461 target[n * tChannels + 1u] = uint8_t(source1[n]);
2462 target[n * tChannels + 2u] = uint8_t(source2[n]);
2465 source0 += remaining + sourceFrame0PaddingElements;
2466 source1 += remaining + sourceFrame1PaddingElements;
2467 source2 += remaining + sourceFrame2PaddingElements;
2468 target += remaining * tChannels + targetFramePaddingElements;
2474 inline void FrameChannels::zipChannels<float, uint8_t, 4u>(
const float*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2476 ocean_assert(sourceFrames !=
nullptr);
2477 ocean_assert(targetFrame !=
nullptr);
2479 ocean_assert(width != 0u && height != 0u);
2480 ocean_assert(channels == 4u);
2482 constexpr
unsigned int tChannels = 4u;
2484 bool allSourceFramesContinuous =
true;
2486 if (sourceFramesPaddingElements !=
nullptr)
2488 for (
unsigned int n = 0u; n < tChannels; ++n)
2490 if (sourceFramesPaddingElements[n] != 0u)
2492 allSourceFramesContinuous =
false;
2498 const float* source0 = sourceFrames[0];
2499 const float* source1 = sourceFrames[1];
2500 const float* source2 = sourceFrames[2];
2501 const float* source3 = sourceFrames[3];
2502 uint8_t* target = targetFrame;
2504 constexpr
unsigned int tBlockSize = 16u;
2506 uint8x16x4_t target_8x16x4;
2508 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2510 const unsigned int pixels = width * height;
2511 const unsigned int blocks = pixels / tBlockSize;
2512 const unsigned int remaining = pixels % tBlockSize;
2514 for (
unsigned int n = 0u; n < blocks; ++n)
2521 vst4q_u8(target, target_8x16x4);
2523 source0 += tBlockSize;
2524 source1 += tBlockSize;
2525 source2 += tBlockSize;
2526 source3 += tBlockSize;
2528 target += tBlockSize * tChannels;
2531 for (
unsigned int n = 0u; n < remaining; ++n)
2533 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2534 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2535 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2536 ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
2538 target[n * tChannels + 0u] = uint8_t(source0[n]);
2539 target[n * tChannels + 1u] = uint8_t(source1[n]);
2540 target[n * tChannels + 2u] = uint8_t(source2[n]);
2541 target[n * tChannels + 3u] = uint8_t(source3[n]);
2546 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2547 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2548 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[2];
2549 const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[3];
2551 const unsigned int blocks = width / tBlockSize;
2552 const unsigned int remaining = width % tBlockSize;
2554 for (
unsigned int y = 0u; y < height; ++y)
2556 for (
unsigned int n = 0u; n < blocks; ++n)
2563 vst4q_u8(target, target_8x16x4);
2565 source0 += tBlockSize;
2566 source1 += tBlockSize;
2567 source2 += tBlockSize;
2568 source3 += tBlockSize;
2570 target += tBlockSize * tChannels;
2573 for (
unsigned int n = 0u; n < remaining; ++n)
2575 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2576 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2577 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2578 ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
2580 target[n * tChannels + 0u] = uint8_t(source0[n]);
2581 target[n * tChannels + 1u] = uint8_t(source1[n]);
2582 target[n * tChannels + 2u] = uint8_t(source2[n]);
2583 target[n * tChannels + 3u] = uint8_t(source3[n]);
2586 source0 += remaining + sourceFrame0PaddingElements;
2587 source1 += remaining + sourceFrame1PaddingElements;
2588 source2 += remaining + sourceFrame2PaddingElements;
2589 source3 += remaining + sourceFrame3PaddingElements;
2590 target += remaining * tChannels + targetFramePaddingElements;
2597 template <
typename TSource,
typename TTarget,
unsigned int tChannels>
2598 void FrameChannels::zipChannels(
const TSource*
const* sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2600 ocean_assert(sourceFrames !=
nullptr);
2601 ocean_assert(targetFrame !=
nullptr);
2603 ocean_assert(width != 0u && height != 0u);
2609 zipChannelsRuntime<TSource, TTarget>(sourceFrames, targetFrame, width, height, channels, sourceFramesPaddingElements, targetFramePaddingElements);
2613 bool allSourceFramesContinuous =
true;
2615 if (sourceFramesPaddingElements !=
nullptr)
2617 for (
unsigned int n = 0u; n < tChannels; ++n)
2619 if (sourceFramesPaddingElements[n] != 0u)
2621 allSourceFramesContinuous =
false;
2627 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2629 for (
unsigned int n = 0u; n < width * height; ++n)
2631 for (
unsigned int c = 0u; c < tChannels; ++c)
2633 targetFrame[n * tChannels + c] = TTarget(sourceFrames[c][n]);
2639 const unsigned int targetFrameStrideElements = width * tChannels + targetFramePaddingElements;
2641 Indices32 sourceFrameStrideElements(tChannels);
2643 for (
unsigned int c = 0u; c < tChannels; ++c)
2645 if (sourceFramesPaddingElements ==
nullptr)
2647 sourceFrameStrideElements[c] = width;
2651 sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
2655 for (
unsigned int y = 0u; y < height; ++y)
2657 TTarget*
const targetRow = targetFrame + y * targetFrameStrideElements;
2659 for (
unsigned int x = 0u; x < width; ++x)
2661 for (
unsigned int c = 0u; c < tChannels; ++c)
2663 *(targetRow + x * tChannels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x));
2670 template <
typename TSource,
typename TTarget>
2671 void FrameChannels::zipChannels(
const std::initializer_list<const TSource*>& sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const std::initializer_list<unsigned int>& sourceFramePaddingElements,
const unsigned int targetFramePaddingElements)
2673 ocean_assert(sourceFrames.size() >= 1);
2674 ocean_assert(sourceFramePaddingElements.size() == 0 || sourceFrames.size() == sourceFramePaddingElements.size());
2676 if (sourceFrames.size() == 2)
2678 zipChannels<TSource, TTarget, 2u>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2680 else if (sourceFrames.size() == 3)
2682 zipChannels<TSource, TTarget, 3u>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2684 else if (sourceFrames.size() == 4)
2686 zipChannels<TSource, TTarget, 4u>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2690 zipChannels<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2694 template <
typename T,
unsigned int tSourceChannels>
2695 inline void FrameChannels::addFirstChannel(
const T* source,
const T* sourceNewChannel, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int sourceNewChannelPaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
2697 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
2699 ocean_assert(source !=
nullptr && sourceNewChannel !=
nullptr && target !=
nullptr);
2700 ocean_assert(source != target);
2701 ocean_assert(width >= 1u && height >= 1u);
2703 const unsigned int options[3] = {sourcePaddingElements, sourceNewChannelPaddingElements, targetPaddingElements};
2705 const void* sources[2] = {source, sourceNewChannel};
2707 FrameConverter::convertArbitraryPixelFormat(sources, (
void**)&target, width, height, conversionFlag, 1u, FrameChannels::addChannelRow<T, tSourceChannels, true>, options, worker);
2710 template <
typename T,
unsigned int tSourceChannels>
2711 inline void FrameChannels::addFirstChannelValue(
const T* source,
const T newChannelValue, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
2713 static_assert(tSourceChannels >= 1u,
"Invalid channel number!");
2715 ocean_assert(source !=
nullptr && target !=
nullptr);
2716 ocean_assert(width >= 1u && height >= 1u);
2718 const unsigned int targetChannels = tSourceChannels + 1u;
2720 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2721 const unsigned int targetStrideElements = width * targetChannels + targetPaddingElements;
2723 const void* channelValueParameter = (
const void*)(&newChannelValue);
2725 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2727 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::addChannelValueRow<T, tSourceChannels, true>, FrameChannels::reverseRowPixelOrderInPlace<T, targetChannels>, areContinuous, channelValueParameter, worker);
2730 template <
typename T,
unsigned int tSourceChannels>
2731 inline void FrameChannels::addLastChannel(
const T* source,
const T* sourceNewChannel, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int sourceNewChannelPaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
2733 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
2735 ocean_assert(source !=
nullptr && sourceNewChannel !=
nullptr && target !=
nullptr);
2736 ocean_assert(source != target);
2737 ocean_assert(width >= 1u && height >= 1u);
2739 const unsigned int options[3] = {sourcePaddingElements, sourceNewChannelPaddingElements, targetPaddingElements};
2741 const void* sources[2] = {source, sourceNewChannel};
2743 FrameConverter::convertArbitraryPixelFormat(sources, (
void**)&target, width, height, conversionFlag, 1u, FrameChannels::addChannelRow<T, tSourceChannels, false>, options, worker);
2746 template <
typename T,
unsigned int tSourceChannels>
2747 inline void FrameChannels::addLastChannelValue(
const T* source,
const T newChannelValue, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
2749 static_assert(tSourceChannels >= 1u,
"Invalid channel number!");
2751 ocean_assert(source !=
nullptr && target !=
nullptr);
2752 ocean_assert(width >= 1u && height >= 1u);
2754 const unsigned int targetChannels = tSourceChannels + 1u;
2756 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2757 const unsigned int targetStrideElements = width * targetChannels + targetPaddingElements;
2759 const void* channelValueParameter = (
const void*)(&newChannelValue);
2761 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2763 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::addChannelValueRow<T, tSourceChannels, false>, FrameChannels::reverseRowPixelOrderInPlace<T, targetChannels>, areContinuous, channelValueParameter, worker);
2766 template <
typename T,
unsigned int tSourceChannels>
2769 static_assert(tSourceChannels >= 2u && tSourceChannels <= 8u,
"Invalid channel number!");
2771 ocean_assert(source !=
nullptr && target !=
nullptr);
2772 ocean_assert(width >= 1u && height >= 1u);
2774 const unsigned int shufflePatternMax = 0x07654321u;
2775 const unsigned int mask = 0xFFFFFFFFu >> ((8u - tSourceChannels + 1u) * 4u);
2777 const unsigned int shufflePattern = shufflePatternMax & mask;
2779 FrameChannels::shuffleChannels<T, tSourceChannels, tSourceChannels - 1u, shufflePattern>(source, target, width, height, conversionFlag, sourcePaddingElements, targetPaddingElements, worker);
2782 template <
typename T,
unsigned int tSourceChannels>
2785 static_assert(tSourceChannels >= 2u && tSourceChannels <= 8u,
"Invalid channel number!");
2787 ocean_assert(source !=
nullptr && target !=
nullptr);
2788 ocean_assert(width >= 1u && height >= 1u);
2790 const unsigned int shufflePatternMax = 0x76543210u;
2791 const unsigned int mask = 0xFFFFFFFFu >> ((8u - tSourceChannels + 1u) * 4u);
2793 const unsigned int shufflePattern = shufflePatternMax & mask;
2795 FrameChannels::shuffleChannels<T, tSourceChannels, tSourceChannels - 1u, shufflePattern>(source, target, width, height, conversionFlag, sourcePaddingElements, targetPaddingElements, worker);
2798 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tSourceChannelIndex,
unsigned int tTargetChannelIndex>
2799 inline void FrameChannels::copyChannel(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
2801 static_assert(tSourceChannels >= 1u,
"Invalid number of channels!");
2802 static_assert(tTargetChannels >= 1u,
"Invalid number of channels!");
2804 static_assert(tSourceChannelIndex < tSourceChannels,
"Invalid channel index!");
2805 static_assert(tTargetChannelIndex < tTargetChannels,
"Invalid channel index!");
2807 ocean_assert(source !=
nullptr && target !=
nullptr);
2808 ocean_assert(width >= 1u && height >= 1u);
2810 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2811 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
2815 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2817 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements,
CONVERT_NORMAL, FrameChannels::copyChannelRow<T, tSourceChannels, tTargetChannels, tSourceChannelIndex, tTargetChannelIndex>, reversePixelOrderRowInPlaceFunction, areContinuous,
nullptr, worker);
2820 template <
typename T,
unsigned int tChannel,
unsigned int tChannels>
2821 inline void FrameChannels::setChannel(T* frame,
const unsigned int width,
const unsigned int height,
const T value,
const unsigned int framePaddingElements,
Worker* worker)
2823 static_assert(tChannels >= 1u,
"Invalid channel number!");
2824 static_assert(tChannel < tChannels,
"Invalid channel index!");
2826 ocean_assert(frame !=
nullptr);
2827 ocean_assert(width >= 1u && height >= 1u);
2831 worker->
executeFunction(
Worker::Function::createStatic(&setChannelSubset<T, tChannel, tChannels>, frame, width, value, framePaddingElements, 0u, 0u), 0u, height);
2835 setChannelSubset<T, tChannel, tChannels>(frame, width, value, framePaddingElements, 0u, height);
2839 template <
typename T,
unsigned int tChannels>
2842 static_assert(tChannels >= 1u,
"Invalid channel number!");
2844 ocean_assert(source !=
nullptr && target !=
nullptr);
2845 ocean_assert(width >= 1u && height >= 1u);
2847 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
2848 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
2850 constexpr
bool areContinuous =
false;
2852 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::reverseRowChannelOrder<T, tChannels>, FrameChannels::reverseRowPixelOrderInPlace<T, tChannels>, areContinuous,
nullptr, worker);
2855 template <
typename T,
unsigned int tChannels>
2858 static_assert(tChannels >= 1u,
"Invalid channel number!");
2860 ocean_assert(source !=
nullptr && target !=
nullptr);
2861 ocean_assert(size >= 1);
2864 const T*
const debugSourceStart = source;
2865 const T*
const debugSourceEnd = debugSourceStart + size * tChannels;
2867 const T*
const debugTargetStart = target;
2868 const T*
const debugTargetEnd = debugTargetStart + size * tChannels;
2872 target += size * tChannels;
2874 const T*
const sourceEnd = source + size * tChannels;
2876 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
2880 const size_t blocks16 = size / size_t(16);
2886 for (
size_t n = 0; n < blocks16; ++n)
2888 target -= 16u * tChannels;
2890 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2891 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2893 const uint8x16_t source_u_8x16 = vld1q_u8((
const uint8_t*)(source));
2894 uint8x16_t revSource_u_8x16 = vrev64q_u8(source_u_8x16);
2895 revSource_u_8x16 = vcombine_u8(vget_high_u8(revSource_u_8x16), vget_low_u8(revSource_u_8x16));
2897 vst1q_u8((uint8_t*)(target), revSource_u_8x16);
2899 source += 16u * tChannels;
2907 for (
size_t n = 0; n < blocks16; ++n)
2909 target -= 16u * tChannels;
2911 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2912 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2914 const uint8x16_t sourceA_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 0);
2915 const uint8x16_t sourceB_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 16);
2917 const uint8x16_t revSourceA_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u8(sourceA_u_8x16)));
2918 const uint8x16_t revSourceB_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u8(sourceB_u_8x16)));
2920 const uint8x16_t targetA_u_8x16 = vcombine_u8(vget_high_u8(revSourceA_u_8x16), vget_low_u8(revSourceA_u_8x16));
2921 const uint8x16_t targetB_u_8x16 = vcombine_u8(vget_high_u8(revSourceB_u_8x16), vget_low_u8(revSourceB_u_8x16));
2923 vst1q_u8((uint8_t*)(target) + 0, targetB_u_8x16);
2924 vst1q_u8((uint8_t*)(target) + 16, targetA_u_8x16);
2926 source += 16u * tChannels;
2934 for (
size_t n = 0; n < blocks16; ++n)
2936 target -= 16u * tChannels;
2938 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2939 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2941 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)(source));
2943 uint8x16x3_t revSource_u_8x16x3;
2944 revSource_u_8x16x3.val[0] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[0])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[0])));
2945 revSource_u_8x16x3.val[1] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[1])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[1])));
2946 revSource_u_8x16x3.val[2] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[2])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[2])));
2948 vst3q_u8((uint8_t*)(target), revSource_u_8x16x3);
2950 source += 16u * tChannels;
2958 for (
size_t n = 0; n < blocks16; ++n)
2960 target -= 16u * tChannels;
2962 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2963 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2965 const uint8x16_t sourceA_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 0);
2966 const uint8x16_t sourceB_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 16);
2967 const uint8x16_t sourceC_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 32);
2968 const uint8x16_t sourceD_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 48);
2970 const uint8x16_t revSourceA_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceA_u_8x16)));
2971 const uint8x16_t revSourceB_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceB_u_8x16)));
2972 const uint8x16_t revSourceC_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceC_u_8x16)));
2973 const uint8x16_t revSourceD_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceD_u_8x16)));
2975 const uint8x16_t targetA_u_8x16 = vcombine_u8(vget_high_u8(revSourceA_u_8x16), vget_low_u8(revSourceA_u_8x16));
2976 const uint8x16_t targetB_u_8x16 = vcombine_u8(vget_high_u8(revSourceB_u_8x16), vget_low_u8(revSourceB_u_8x16));
2977 const uint8x16_t targetC_u_8x16 = vcombine_u8(vget_high_u8(revSourceC_u_8x16), vget_low_u8(revSourceC_u_8x16));
2978 const uint8x16_t targetD_u_8x16 = vcombine_u8(vget_high_u8(revSourceD_u_8x16), vget_low_u8(revSourceD_u_8x16));
2980 vst1q_u8((uint8_t*)(target) + 0, targetD_u_8x16);
2981 vst1q_u8((uint8_t*)(target) + 16, targetC_u_8x16);
2982 vst1q_u8((uint8_t*)(target) + 32, targetB_u_8x16);
2983 vst1q_u8((uint8_t*)(target) + 48, targetA_u_8x16);
2985 source += 16u * tChannels;
2998 while (source != sourceEnd)
3000 ocean_assert(source < sourceEnd);
3002 for (
unsigned int n = 0u; n < tChannels; ++n)
3004 ocean_assert(source + tChannels - n - 1u >= debugSourceStart);
3005 ocean_assert(source + tChannels - n - 1u < debugSourceEnd);
3007 ocean_assert(target > debugTargetStart && target <= debugTargetEnd);
3009 *--target = source[tChannels - n - 1u];
3012 source += tChannels;
3016 template <
typename T,
unsigned int tChannels>
3019 static_assert(tChannels >= 1u,
"Invalid channel number!");
3021 ocean_assert(data !=
nullptr);
3022 ocean_assert(size >= 1);
3028 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3034 const size_t blocks32 = size / size_t(32);
3036 uint8_t* left = (uint8_t*)(data);
3037 uint8_t* right = (uint8_t*)(data) + (size - 16u) * tChannels;
3043 for (
size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3045 const uint8x16_t left_u_8x16 = vld1q_u8(left);
3046 const uint8x16_t right_u_8x16 = vld1q_u8(right);
3048 uint8x16_t revLeft_u_8x16 = vrev64q_u8(left_u_8x16);
3049 revLeft_u_8x16 = vcombine_u8(vget_high_u8(revLeft_u_8x16), vget_low_u8(revLeft_u_8x16));
3051 uint8x16_t revRight_u_8x16 = vrev64q_u8(right_u_8x16);
3052 revRight_u_8x16 = vcombine_u8(vget_high_u8(revRight_u_8x16), vget_low_u8(revRight_u_8x16));
3054 vst1q_u8(left, revRight_u_8x16);
3055 vst1q_u8(right, revLeft_u_8x16);
3057 left += 16u * tChannels;
3058 right -= 16u * tChannels;
3061 n += blocks32 * 16u;
3068 for (
size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3070 const uint8x16x2_t left_u_8x16x2 = vld2q_u8(left);
3071 const uint8x16x2_t right_u_8x16x2 = vld2q_u8(right);
3073 uint8x16x2_t revLeft_u_8x16x2;
3074 revLeft_u_8x16x2.val[0] = vrev64q_u8(left_u_8x16x2.val[0]);
3075 revLeft_u_8x16x2.val[1] = vrev64q_u8(left_u_8x16x2.val[1]);
3076 revLeft_u_8x16x2.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x2.val[0]), vget_low_u8(revLeft_u_8x16x2.val[0]));
3077 revLeft_u_8x16x2.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x2.val[1]), vget_low_u8(revLeft_u_8x16x2.val[1]));
3079 uint8x16x2_t revRight_u_8x16x2;
3080 revRight_u_8x16x2.val[0] = vrev64q_u8(right_u_8x16x2.val[0]);
3081 revRight_u_8x16x2.val[1] = vrev64q_u8(right_u_8x16x2.val[1]);
3082 revRight_u_8x16x2.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x2.val[0]), vget_low_u8(revRight_u_8x16x2.val[0]));
3083 revRight_u_8x16x2.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x2.val[1]), vget_low_u8(revRight_u_8x16x2.val[1]));
3085 vst2q_u8(left, revRight_u_8x16x2);
3086 vst2q_u8(right, revLeft_u_8x16x2);
3088 left += 16u * tChannels;
3089 right -= 16u * tChannels;
3092 n += blocks32 * 16u;
3099 for (
size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3101 const uint8x16x3_t left_u_8x16x3 = vld3q_u8(left);
3102 const uint8x16x3_t right_u_8x16x3 = vld3q_u8(right);
3104 uint8x16x3_t revLeft_u_8x16x3;
3105 revLeft_u_8x16x3.val[0] = vrev64q_u8(left_u_8x16x3.val[0]);
3106 revLeft_u_8x16x3.val[1] = vrev64q_u8(left_u_8x16x3.val[1]);
3107 revLeft_u_8x16x3.val[2] = vrev64q_u8(left_u_8x16x3.val[2]);
3108 revLeft_u_8x16x3.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[0]), vget_low_u8(revLeft_u_8x16x3.val[0]));
3109 revLeft_u_8x16x3.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[1]), vget_low_u8(revLeft_u_8x16x3.val[1]));
3110 revLeft_u_8x16x3.val[2] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[2]), vget_low_u8(revLeft_u_8x16x3.val[2]));
3112 uint8x16x3_t revRight_u_8x16x3;
3113 revRight_u_8x16x3.val[0] = vrev64q_u8(right_u_8x16x3.val[0]);
3114 revRight_u_8x16x3.val[1] = vrev64q_u8(right_u_8x16x3.val[1]);
3115 revRight_u_8x16x3.val[2] = vrev64q_u8(right_u_8x16x3.val[2]);
3116 revRight_u_8x16x3.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[0]), vget_low_u8(revRight_u_8x16x3.val[0]));
3117 revRight_u_8x16x3.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[1]), vget_low_u8(revRight_u_8x16x3.val[1]));
3118 revRight_u_8x16x3.val[2] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[2]), vget_low_u8(revRight_u_8x16x3.val[2]));
3120 vst3q_u8(left, revRight_u_8x16x3);
3121 vst3q_u8(right, revLeft_u_8x16x3);
3123 left += 16u * tChannels;
3124 right -= 16u * tChannels;
3127 n += blocks32 * 16u;
3134 for (
size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3136 const uint8x16x4_t left_u_8x16x4 = vld4q_u8(left);
3137 const uint8x16x4_t right_u_8x16x4 = vld4q_u8(right);
3139 uint8x16x4_t revLeft_u_8x16x4;
3140 revLeft_u_8x16x4.val[0] = vrev64q_u8(left_u_8x16x4.val[0]);
3141 revLeft_u_8x16x4.val[1] = vrev64q_u8(left_u_8x16x4.val[1]);
3142 revLeft_u_8x16x4.val[2] = vrev64q_u8(left_u_8x16x4.val[2]);
3143 revLeft_u_8x16x4.val[3] = vrev64q_u8(left_u_8x16x4.val[3]);
3144 revLeft_u_8x16x4.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[0]), vget_low_u8(revLeft_u_8x16x4.val[0]));
3145 revLeft_u_8x16x4.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[1]), vget_low_u8(revLeft_u_8x16x4.val[1]));
3146 revLeft_u_8x16x4.val[2] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[2]), vget_low_u8(revLeft_u_8x16x4.val[2]));
3147 revLeft_u_8x16x4.val[3] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[3]), vget_low_u8(revLeft_u_8x16x4.val[3]));
3149 uint8x16x4_t revRight_u_8x16x4;
3150 revRight_u_8x16x4.val[0] = vrev64q_u8(right_u_8x16x4.val[0]);
3151 revRight_u_8x16x4.val[1] = vrev64q_u8(right_u_8x16x4.val[1]);
3152 revRight_u_8x16x4.val[2] = vrev64q_u8(right_u_8x16x4.val[2]);
3153 revRight_u_8x16x4.val[3] = vrev64q_u8(right_u_8x16x4.val[3]);
3154 revRight_u_8x16x4.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[0]), vget_low_u8(revRight_u_8x16x4.val[0]));
3155 revRight_u_8x16x4.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[1]), vget_low_u8(revRight_u_8x16x4.val[1]));
3156 revRight_u_8x16x4.val[2] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[2]), vget_low_u8(revRight_u_8x16x4.val[2]));
3157 revRight_u_8x16x4.val[3] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[3]), vget_low_u8(revRight_u_8x16x4.val[3]));
3159 vst4q_u8(left, revRight_u_8x16x4);
3160 vst4q_u8(right, revLeft_u_8x16x4);
3162 left += 16u * tChannels;
3163 right -= 16u * tChannels;
3166 n += blocks32 * 16u;
3179 PixelType intermediate;
3181 PixelType*
const pixels = (PixelType*)(data);
3183 while (n < size / 2)
3185 intermediate = pixels[n];
3187 pixels[n] = pixels[size - n - 1];
3188 pixels[size - n - 1] = intermediate;
3194 template <
typename T,
unsigned int tChannels>
3197 ocean_assert(source !=
nullptr && target !=
nullptr);
3198 ocean_assert(source != target);
3199 ocean_assert(size >= 1);
3202 const T*
const debugSourceStart = source;
3203 const T*
const debugSourceEnd = debugSourceStart + size * tChannels;
3205 const T*
const debugTargetStart = target;
3206 const T*
const debugTargetEnd = debugTargetStart + size * tChannels;
3209 if constexpr (tChannels == 1)
3213 memcpy(target, source,
sizeof(T) * size);
3217 const T*
const sourceEnd = source + size * tChannels;
3219 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
3223 const size_t blocks16 = size / size_t(16);
3228 ocean_assert(
false &&
"This should have been handled above!");
3233 for (
size_t n = 0; n < blocks16; ++n)
3237 source += 16u * tChannels;
3238 target += 16u * tChannels;
3246 for (
size_t n = 0; n < blocks16; ++n)
3250 source += 16u * tChannels;
3251 target += 16u * tChannels;
3259 for (
size_t n = 0; n < blocks16; ++n)
3263 source += 16u * tChannels;
3264 target += 16u * tChannels;
3275 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3279 const size_t blocks16 = size / size_t(16);
3284 ocean_assert(
false &&
"This should have been handled above!");
3289 for (
size_t n = 0; n < blocks16; ++n)
3291 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3292 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3294 const uint8x16_t sourceA_u_8x16 = vld1q_u8((
const uint8_t*)source + 0);
3295 const uint8x16_t sourceB_u_8x16 = vld1q_u8((
const uint8_t*)source + 16);
3297 const uint8x16_t revSourceA_u_8x16 = vrev16q_u8(sourceA_u_8x16);
3298 const uint8x16_t revSourceB_u_8x16 = vrev16q_u8(sourceB_u_8x16);
3300 vst1q_u8((uint8_t*)target + 0, revSourceA_u_8x16);
3301 vst1q_u8((uint8_t*)target + 16, revSourceB_u_8x16);
3303 source += 16u * tChannels;
3304 target += 16u * tChannels;
3312 for (
size_t n = 0; n < blocks16; ++n)
3314 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3315 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3317 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3319 uint8x16x3_t revSource_u_8x16x3;
3320 revSource_u_8x16x3.val[0] = source_u_8x16x3.val[2];
3321 revSource_u_8x16x3.val[1] = source_u_8x16x3.val[1];
3322 revSource_u_8x16x3.val[2] = source_u_8x16x3.val[0];
3324 vst3q_u8((uint8_t*)target, revSource_u_8x16x3);
3326 source += 16u * tChannels;
3327 target += 16u * tChannels;
3335 for (
size_t n = 0; n < blocks16; ++n)
3337 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3338 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3340 const uint8x16_t sourceA_u_8x16 = vld1q_u8((
const uint8_t*)source + 0);
3341 const uint8x16_t sourceB_u_8x16 = vld1q_u8((
const uint8_t*)source + 16);
3342 const uint8x16_t sourceC_u_8x16 = vld1q_u8((
const uint8_t*)source + 32);
3343 const uint8x16_t sourceD_u_8x16 = vld1q_u8((
const uint8_t*)source + 48);
3345 const uint8x16_t revSourceA_u_8x16 = vrev32q_u8(sourceA_u_8x16);
3346 const uint8x16_t revSourceB_u_8x16 = vrev32q_u8(sourceB_u_8x16);
3347 const uint8x16_t revSourceC_u_8x16 = vrev32q_u8(sourceC_u_8x16);
3348 const uint8x16_t revSourceD_u_8x16 = vrev32q_u8(sourceD_u_8x16);
3350 vst1q_u8((uint8_t*)target + 0, revSourceA_u_8x16);
3351 vst1q_u8((uint8_t*)target + 16, revSourceB_u_8x16);
3352 vst1q_u8((uint8_t*)target + 32, revSourceC_u_8x16);
3353 vst1q_u8((uint8_t*)target + 48, revSourceD_u_8x16);
3355 source += 16u * tChannels;
3356 target += 16u * tChannels;
3369 while (source != sourceEnd)
3371 ocean_assert(source < sourceEnd);
3373 ocean_assert(source >= debugSourceStart && source + tChannels <= debugSourceEnd);
3374 ocean_assert(target >= debugTargetStart && target + tChannels <= debugTargetEnd);
3376 for (
unsigned int n = 0u; n < tChannels; ++n)
3378 target[n] = source[tChannels - n - 1u];
3381 source += tChannels;
3382 target += tChannels;
3386 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
3389 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u,
"Invalid channel number!");
3390 static_assert(tTargetChannels >= 1u && tTargetChannels <= 8u,
"Invalid channel number!");
3392 static_assert(tSourceChannels != 1u || tTargetChannels != 1u,
"Invalid channel number!");
3394 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels,
"Invalid shuffle pattern!");
3395 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels,
"Invalid shuffle pattern!");
3396 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels,
"Invalid shuffle pattern!");
3397 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels,
"Invalid shuffle pattern!");
3398 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels,
"Invalid shuffle pattern!");
3399 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels,
"Invalid shuffle pattern!");
3400 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels,
"Invalid shuffle pattern!");
3401 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels,
"Invalid shuffle pattern!");
3403 ocean_assert(source !=
nullptr && target !=
nullptr);
3404 ocean_assert(size != 0);
3406 const T*
const sourceEnd = source + size * tSourceChannels;
3408 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
3412 const size_t blocks16 = size / size_t(16);
3414 switch (tSourceChannels | ((tTargetChannels) << 4u))
3417 case (4u | (4u << 4u)):
3421 constexpr
unsigned int offset1 = 0x04040404u;
3422 constexpr
unsigned int offset2 = 0x08080808u;
3423 constexpr
unsigned int offset3 = 0x0C0C0C0Cu;
3426 const unsigned int shufflePattern0 = ((tShufflePattern & 0xF000u) << 12u) | ((tShufflePattern & 0x0F00u) << 8u) | ((tShufflePattern & 0x00F0u) << 4u) | ((tShufflePattern & 0x000Fu) << 0u);
3428 const unsigned int shufflePattern1 = shufflePattern0 + offset1;
3429 const unsigned int shufflePattern2 = shufflePattern0 + offset2;
3430 const unsigned int shufflePattern3 = shufflePattern0 + offset3;
3432 const __m128i shufflePattern128 =
SSE::set128i((((
unsigned long long)shufflePattern3) << 32ull) | (
unsigned long long)shufflePattern2, (((
unsigned long long)shufflePattern1) << 32ull) | (
unsigned long long)shufflePattern0);
3434 for (
size_t n = 0; n < blocks16; ++n)
3441 source += 16u * tSourceChannels;
3442 target += 16u * tTargetChannels;
3454 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3458 const size_t blocks16 = size / size_t(16);
3460 switch (tSourceChannels | ((tTargetChannels) << 4u))
3463 case (1u | (3u << 4u)):
3465 static_assert(tSourceChannels != 1u || tShufflePattern == 0u,
"Invalid shuffle patter!");
3467 for (
size_t n = 0; n < blocks16; ++n)
3469 const uint8x16_t source_u_8x16 = vld1q_u8((
const uint8_t*)source);
3471 uint8x16x3_t target_u_8x16x3;
3473 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3475 target_u_8x16x3.val[nT] = source_u_8x16;
3478 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3480 source += 16u * tSourceChannels;
3481 target += 16u * tTargetChannels;
3488 case (2u | (1u << 4u)):
3490 for (
size_t n = 0; n < blocks16; ++n)
3492 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((
const uint8_t*)source);
3494 constexpr
unsigned int sourceChannel = tShufflePattern & 0x00000001u;
3495 static_assert(sourceChannel <= 1u,
"Invalid shuffle pattern!");
3496 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3498 const uint8x16_t target_u_8x16 = source_u_8x16x2.val[sourceChannel];
3500 vst1q_u8((uint8_t*)target, target_u_8x16);
3502 source += 16u * tSourceChannels;
3503 target += 16u * tTargetChannels;
3510 case (2u | (3u << 4u)):
3512 for (
size_t n = 0; n < blocks16; ++n)
3514 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((
const uint8_t*)source);
3516 uint8x16x3_t target_u_8x16x3;
3518 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3520 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000001u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3522 target_u_8x16x3.val[nT] = source_u_8x16x2.val[(tShufflePattern >> (nT * 4u)) & 0x00000001u];
3525 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3527 source += 16u * tSourceChannels;
3528 target += 16u * tTargetChannels;
3535 case (2u | (4u << 4u)):
3537 for (
size_t n = 0; n < blocks16; ++n)
3539 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((
const uint8_t*)source);
3541 uint8x16x4_t target_u_8x16x4;
3543 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3545 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000001u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3547 target_u_8x16x4.val[nT] = source_u_8x16x2.val[(tShufflePattern >> (nT * 4u)) & 0x00000001u];
3550 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3552 source += 16u * tSourceChannels;
3553 target += 16u * tTargetChannels;
3560 case (3u | (1u << 4u)):
3562 constexpr
unsigned int sourceChannel = (tShufflePattern & 0x0000000Fu) <= 2u ? (tShufflePattern & 0x0000000Fu) : 2u;
3563 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3565 for (
size_t n = 0; n < blocks16; ++n)
3567 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3569 const uint8x16_t target_u_8x16 = source_u_8x16x3.val[sourceChannel];
3571 vst1q_u8((uint8_t*)target, target_u_8x16);
3573 source += 16u * tSourceChannels;
3574 target += 16u * tTargetChannels;
3581 case (3u | (2u << 4u)):
3583 for (
size_t n = 0; n < blocks16; ++n)
3585 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3587 uint8x16x2_t target_u_8x16x2;
3589 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3591 target_u_8x16x2.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)];
3594 vst2q_u8((uint8_t*)target, target_u_8x16x2);
3596 source += 16u * tSourceChannels;
3597 target += 16u * tTargetChannels;
3604 case (3u | (3u << 4u)):
3606 for (
size_t n = 0; n < blocks16; ++n)
3608 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3610 uint8x16x3_t target_u_8x16x3;
3612 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3614 target_u_8x16x3.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)];
3617 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3619 source += 16u * tSourceChannels;
3620 target += 16u * tTargetChannels;
3627 case (4u | (1u << 4u)):
3629 for (
size_t n = 0; n < blocks16; ++n)
3631 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3633 constexpr
unsigned int sourceChannel = tShufflePattern & 0x00000003u;
3634 static_assert(sourceChannel <= 3u,
"Invalid shuffle pattern!");
3636 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3638 const uint8x16_t target_u_8x16 = source_u_8x16x4.val[sourceChannel];
3640 vst1q_u8((uint8_t*)target, target_u_8x16);
3642 source += 16u * tSourceChannels;
3643 target += 16u * tTargetChannels;
3650 case (4u | (2u << 4u)):
3652 for (
size_t n = 0; n < blocks16; ++n)
3654 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3656 uint8x16x2_t target_u_8x16x2;
3658 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3660 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3662 target_u_8x16x2.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u];
3665 vst2q_u8((uint8_t*)target, target_u_8x16x2);
3667 source += 16u * tSourceChannels;
3668 target += 16u * tTargetChannels;
3675 case (4u | (3u << 4u)):
3677 for (
size_t n = 0; n < blocks16; ++n)
3679 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3681 uint8x16x3_t target_u_8x16x3;
3683 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3685 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3687 target_u_8x16x3.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u];
3690 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3692 source += 16u * tSourceChannels;
3693 target += 16u * tTargetChannels;
3700 case (4u | (4u << 4u)):
3702 for (
size_t n = 0; n < blocks16; ++n)
3704 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3706 uint8x16x4_t target_u_8x16x4;
3708 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3710 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3712 target_u_8x16x4.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u];
3715 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3717 source += 16u * tSourceChannels;
3718 target += 16u * tTargetChannels;
3732 while (source != sourceEnd)
3734 ocean_assert(source < sourceEnd);
3736 for (
unsigned int n = 0u; n < tTargetChannels; ++n)
3738 target[n] = source[(tShufflePattern >> (n * 4u)) & 0x0000000Fu];
3741 source += tSourceChannels;
3742 target += tTargetChannels;
3746 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
3749 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u,
"Invalid channel number!");
3750 static_assert(tTargetChannels >= 2u && tTargetChannels <= 8u,
"Invalid channel number!");
3752 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels,
"Invalid shuffle pattern!");
3753 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels,
"Invalid shuffle pattern!");
3754 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels,
"Invalid shuffle pattern!");
3755 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels,
"Invalid shuffle pattern!");
3756 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels,
"Invalid shuffle pattern!");
3757 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels,
"Invalid shuffle pattern!");
3758 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels,
"Invalid shuffle pattern!");
3759 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels,
"Invalid shuffle pattern!");
3761 ocean_assert(source !=
nullptr && target !=
nullptr);
3762 ocean_assert(size != 0);
3764 ocean_assert(options !=
nullptr);
3766 const T lastChannelValue = *(
const T*)(options);
3768 const T*
const sourceEnd = source + size * tSourceChannels;
3770 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3774 const size_t blocks16 = size / size_t(16);
3776 switch (tSourceChannels | ((tTargetChannels) << 4u))
3779 case (1u | (4u << 4u)):
3781 ocean_assert(tShufflePattern == 0u);
3783 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3785 uint8x16x4_t target_u_8x16x4;
3786 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3788 for (
size_t n = 0; n < blocks16; ++n)
3790 const uint8x16_t source_u_8x16 = vld1q_u8((
const uint8_t*)source);
3792 for (
unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3794 target_u_8x16x4.val[nT] = source_u_8x16;
3797 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3799 source += 16u * tSourceChannels;
3800 target += 16u * tTargetChannels;
3807 case (3u | (4u << 4u)):
3809 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3811 uint8x16x4_t target_u_8x16x4;
3812 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3814 for (
size_t n = 0; n < blocks16; ++n)
3816 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3818 for (
unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3820 target_u_8x16x4.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)];
3823 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3825 source += 16u * tSourceChannels;
3826 target += 16u * tTargetChannels;
3833 case (4u | (4u << 4u)):
3835 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3837 uint8x16x4_t target_u_8x16x4;
3838 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3840 for (
size_t n = 0; n < blocks16; ++n)
3842 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3844 for (
unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3846 target_u_8x16x4.val[nT] = source_u_8x16x4.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 3u)];
3849 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3851 source += 16u * tSourceChannels;
3852 target += 16u * tTargetChannels;
3866 while (source != sourceEnd)
3868 ocean_assert(source < sourceEnd);
3870 for (
unsigned int n = 0u; n < tTargetChannels - 1u; ++n)
3872 target[n] = source[(tShufflePattern >> (n * 4u)) & 0x0000000Fu];
3873 target[tTargetChannels - 1u] = lastChannelValue;
3876 source += tSourceChannels;
3877 target += tTargetChannels;
3881 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
3884 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u,
"Invalid channel number!");
3885 static_assert(tTargetChannels >= 1u && tTargetChannels <= 8u,
"Invalid channel number!");
3887 static_assert(tSourceChannels != 1u || tTargetChannels != 1u,
"Invalid channel number!");
3889 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels,
"Invalid shuffle pattern!");
3890 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels,
"Invalid shuffle pattern!");
3891 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels,
"Invalid shuffle pattern!");
3892 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels,
"Invalid shuffle pattern!");
3893 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels,
"Invalid shuffle pattern!");
3894 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels,
"Invalid shuffle pattern!");
3895 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels,
"Invalid shuffle pattern!");
3896 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels,
"Invalid shuffle pattern!");
3898 ocean_assert(source !=
nullptr && target !=
nullptr);
3899 ocean_assert(width >= 1u && height >= 1u);
3901 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
3902 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
3904 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3906 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::shuffleRowChannels<T, tSourceChannels, tTargetChannels, tShufflePattern>, FrameChannels::reverseRowPixelOrderInPlace<T, tTargetChannels>, areContinuous,
nullptr, worker);
3909 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
3912 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u,
"Invalid channel number!");
3913 static_assert(tTargetChannels >= 2u && tTargetChannels <= 8u,
"Invalid channel number!");
3915 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels,
"Invalid shuffle pattern!");
3916 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels,
"Invalid shuffle pattern!");
3917 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels,
"Invalid shuffle pattern!");
3918 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels,
"Invalid shuffle pattern!");
3919 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels,
"Invalid shuffle pattern!");
3920 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels,
"Invalid shuffle pattern!");
3921 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels,
"Invalid shuffle pattern!");
3922 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels,
"Invalid shuffle pattern!");
3924 ocean_assert(source !=
nullptr && target !=
nullptr);
3925 ocean_assert(width >= 1u && height >= 1u);
3927 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
3928 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
3930 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3932 const T options = newChannelValue;
3934 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::shuffleRowChannelsAndSetLastChannelValue<T, tSourceChannels, tTargetChannels, tShufflePattern>, FrameChannels::reverseRowPixelOrderInPlace<T, tTargetChannels>, areContinuous, &options, worker);
3937 template <
unsigned int tChannels>
3940 static_assert(tChannels >= 1u,
"Invalid channel number!");
3942 ocean_assert(source !=
nullptr && target !=
nullptr);
3943 ocean_assert(width >= 1u && height >= 1u);
3945 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
3946 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
3948 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3950 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::narrowRow16BitPerChannelTo8BitPerChannel<tChannels>, FrameChannels::reverseRowPixelOrderInPlace<uint8_t, tChannels>, areContinuous,
nullptr, worker);
3953 template <
typename T,
unsigned int tChannels,
void (*tPixelFunction)(const T*, T*)>
3956 static_assert(tChannels > 0u,
"Invalid channel number!");
3958 ocean_assert(source && target);
3959 ocean_assert(width != 0u && height != 0u);
3962 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::applyPixelModifierSubset<T, tChannels, tPixelFunction>, source, target, width, height, conversionFlag, 0u, 0u), 0u, height);
3964 applyPixelModifierSubset<T, tChannels, tPixelFunction>(source, target, width, height, conversionFlag, 0u, height);
3967 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tPixelFunction)(const TSource*, TTarget*)>
3970 static_assert(tSourceChannels > 0u,
"Invalid source channel number!");
3971 static_assert(tTargetChannels > 0u,
"Invalid target channel number!");
3973 ocean_assert(source && target);
3974 ocean_assert(width != 0u && height != 0u);
3978 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::applyAdvancedPixelModifierSubset<TSource, TTarget, tSourceChannels, tTargetChannels, tPixelFunction>, source, target, width, height, sourcePaddingElements, targetPaddingElements, conversionFlag, 0u, 0u), 0u, height);
3982 applyAdvancedPixelModifierSubset<TSource, TTarget, tSourceChannels, tTargetChannels, tPixelFunction>(source, target, width, height, sourcePaddingElements, targetPaddingElements, conversionFlag, 0u, height);
3986 template <
typename TSource0,
typename TSource1,
typename TTarget,
typename TIntermediate,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
3987 void FrameChannels::applyBivariateOperator(
const TSource0* source0,
const TSource1* source1, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int source0PaddingElements,
const unsigned int source1PaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
Worker* worker)
3989 static_assert(tSourceChannels > 0u,
"Invalid source channel number!");
3990 static_assert(tTargetChannels > 0u,
"Invalid target channel number!");
3992 ocean_assert(source0 && source1 && target);
3993 ocean_assert(width != 0u && height != 0u);
3997 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::applyBivariateOperatorSubset<TSource0, TSource1, TTarget, TIntermediate, tSourceChannels, tTargetChannels, tOperator>, source0, source1, target, width, height, source0PaddingElements, source1PaddingElements, targetPaddingElements, conversionFlag, 0u, 0u), 0u, height);
4001 FrameChannels::applyBivariateOperatorSubset<TSource0, TSource1, TTarget, TIntermediate, tSourceChannels, tTargetChannels, tOperator>(source0, source1, target, width, height, source0PaddingElements, source1PaddingElements, targetPaddingElements, conversionFlag, 0u, height);
4005 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
4006 void FrameChannels::applyRowOperator(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels>& rowOperatorFunction,
Worker* worker)
4008 static_assert(tSourceChannels > 0u,
"Invalid source channel number!");
4009 static_assert(tTargetChannels > 0u,
"Invalid target channel number!");
4011 ocean_assert(source !=
nullptr && target !=
nullptr);
4012 ocean_assert(width != 0u && height != 0u);
4014 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
4015 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
4019 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::applyRowOperatorSubset<TSource, TTarget, tSourceChannels, tTargetChannels>, source, target, width, height, sourceStrideElements, targetStrideElements, rowOperatorFunction, 0u, 0u), 0u, height);
4023 applyRowOperatorSubset<TSource, TTarget, tSourceChannels, tTargetChannels>(source, target, width, height, sourceStrideElements, targetStrideElements, rowOperatorFunction, 0u, height);
4027 template <
typename T,
unsigned int tChannels>
4030 ocean_assert(source !=
nullptr && target !=
nullptr);
4031 ocean_assert(width >= 1u && height >= 1u);
4033 const unsigned int bytesPerRow = width *
sizeof(T) * tChannels;
4035 const unsigned int sourceStrideBytes = width *
sizeof(T) * tChannels +
sizeof(T) * sourcePaddingElements;
4036 const unsigned int targetStrideBytes = width *
sizeof(T) * tChannels +
sizeof(T) * targetPaddingElements;
4042 if (worker && height > 200u)
4044 worker->
executeFunction(
Worker::Function::createStatic(&
FrameChannels::transformGenericSubset, (
const uint8_t*)(source), (uint8_t*)(target), width, height, conversionFlag, rowReversePixelOrderFunction, bytesPerRow, sourceStrideBytes, targetStrideBytes, 0u, 0u), 0u, height, 9u, 10u, 20u);
4048 transformGenericSubset((
const uint8_t*)(source), (uint8_t*)(target), width, height, conversionFlag, rowReversePixelOrderFunction, bytesPerRow, sourceStrideBytes, targetStrideBytes, 0u, height);
4052 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
4055 static_assert(tChannels >= 2u,
"Invalid channel number!");
4056 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
4058 ocean_assert(frame !=
nullptr);
4059 ocean_assert(width >= 1u && height >= 1u);
4061 if (worker && height > 200u)
4063 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, frame, width, framePaddingElements, 0u, 0u), 0u, height, 3u, 4u, 20u);
4067 premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(frame, width, framePaddingElements, 0u, height);
4071 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
4074 static_assert(tChannels >= 2u,
"Invalid channel number!");
4075 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
4077 ocean_assert(source !=
nullptr && target !=
nullptr);
4078 ocean_assert(width >= 1u && height >= 1u);
4080 if (worker && height > 200u)
4082 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, source, target, width, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height, 5u, 6u, 20u);
4086 premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(source, target, width, sourcePaddingElements, targetPaddingElements, 0u, height);
4090 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
4093 static_assert(tChannels >= 2u,
"Invalid channel number!");
4094 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
4096 ocean_assert(frame !=
nullptr);
4097 ocean_assert(width >= 1u && height >= 1u);
4099 if (worker && height > 200u)
4101 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, frame, width, framePaddingElements, 0u, 0u), 0u, height, 3u, 4u, 20u);
4105 straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(frame, width, framePaddingElements, 0u, height);
4109 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
4112 static_assert(tChannels >= 2u,
"Invalid channel number!");
4113 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
4115 ocean_assert(source !=
nullptr && target !=
nullptr);
4116 ocean_assert(width >= 1u && height >= 1u);
4118 if (worker && height > 200u)
4120 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, source, target, width, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height, 5u, 6u, 20u);
4124 straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(source, target, width, sourcePaddingElements, targetPaddingElements, 0u, height);
4128 template <
unsigned int tChannels>
4131 static_assert(tChannels >= 1u,
"Invalid channel number!");
4133 ocean_assert(source !=
nullptr && target !=
nullptr);
4134 ocean_assert(size > 0);
4136 const uint16_t*
const sourceEnd = source + size * tChannels;
4138 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4140 const size_t blocks8 = size / size_t(8);
4146 for (
size_t n = 0; n < blocks8; ++n)
4148 const uint16x8_t sourceA_u_16x8 = vld1q_u16(source + 0);
4149 const uint16x8_t sourceB_u_16x8 = vld1q_u16(source + 8);
4150 const uint16x8_t sourceC_u_16x8 = vld1q_u16(source + 16);
4151 const uint16x8_t sourceD_u_16x8 = vld1q_u16(source + 24);
4153 const uint8x16_t targetAB_u_8x16 = vcombine_u8(vqrshrn_n_u16(sourceA_u_16x8, 8), vqrshrn_n_u16(sourceB_u_16x8, 8));
4154 const uint8x16_t targetCD_u_8x16 = vcombine_u8(vqrshrn_n_u16(sourceC_u_16x8, 8), vqrshrn_n_u16(sourceD_u_16x8, 8));
4156 vst1q_u8(target + 0, targetAB_u_8x16);
4157 vst1q_u8(target + 16, targetCD_u_8x16);
4159 source += 8u * tChannels;
4160 target += 8u * tChannels;
4172 while (source != sourceEnd)
4174 ocean_assert(source < sourceEnd);
4176 for (
unsigned int n = 0u; n < tChannels; ++n)
4178 ocean_assert((uint16_t)(source[n] >> 8u) <= 255u);
4179 target[n] = (uint8_t)(source[n] >> 8u);
4182 source += tChannels;
4183 target += tChannels;
4187 template <
typename T,
unsigned int tSourceChannels,
bool tAddToFront>
4190 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
4191 static_assert(
sizeof(
size_t) ==
sizeof(
const T*),
"Invalid pointer size!");
4193 ocean_assert(sources !=
nullptr && targets !=
nullptr);
4194 ocean_assert(width != 0u && height != 0u);
4195 ocean_assert(multipleRowIndex < height);
4196 ocean_assert(options !=
nullptr);
4198 const T* source = (
const T*)(sources[0]);
4199 const T* sourceOneChannel = (
const T*)(sources[1]);
4200 ocean_assert(source !=
nullptr && sourceOneChannel !=
nullptr);
4202 T* target = (T*)(targets[0]);
4203 ocean_assert(target !=
nullptr);
4205 const unsigned int* uintOptions = (
const unsigned int*)options;
4206 ocean_assert(uintOptions !=
nullptr);
4208 const unsigned int sourcePaddingElements = uintOptions[0];
4209 const unsigned int sourceOneChannelPaddingElements = uintOptions[1];
4210 const unsigned int targetPaddingElements = uintOptions[2];
4212 const unsigned int targetChannels = tSourceChannels + 1u;
4214 const unsigned int sourceStrideElements = tSourceChannels * width + sourcePaddingElements;
4215 const unsigned int sourceOneChannelStrideElements = width + sourceOneChannelPaddingElements;
4216 const unsigned int targetStrideElements = targetChannels * width + targetPaddingElements;
4221 const T* sourceRow = source + sourceStrideElements * multipleRowIndex;
4222 const T* sourceOneChannelRow = sourceOneChannel + sourceOneChannelStrideElements * multipleRowIndex;
4223 T* targetRow = flipTarget ? target + targetStrideElements * (height - multipleRowIndex - 1u) : target + targetStrideElements * multipleRowIndex;
4225 if (mirrorTarget ==
false)
4227 for (
unsigned int n = 0u; n < width; ++n)
4229 if constexpr (tAddToFront)
4231 targetRow[0] = sourceOneChannelRow[0];
4233 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4235 targetRow[c + 1u] = sourceRow[c];
4240 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4242 targetRow[c] = sourceRow[c];
4245 targetRow[tSourceChannels] = sourceOneChannelRow[0];
4248 sourceRow += tSourceChannels;
4249 sourceOneChannelRow++;
4251 targetRow += targetChannels;
4256 targetRow += targetChannels * (width - 1u);
4258 for (
unsigned int n = 0u; n < width; ++n)
4260 if constexpr (tAddToFront)
4262 targetRow[0] = sourceOneChannelRow[0];
4264 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4266 targetRow[c + 1u] = sourceRow[c];
4271 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4273 targetRow[c] = sourceRow[c];
4276 targetRow[tSourceChannels] = sourceOneChannelRow[0];
4279 sourceRow += tSourceChannels;
4280 sourceOneChannelRow++;
4282 targetRow -= targetChannels;
4287 template <
typename T,
unsigned int tSourceChannels,
bool tAddToFront>
4290 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
4292 ocean_assert(source !=
nullptr && target !=
nullptr);
4293 ocean_assert(size > 0);
4294 ocean_assert(channelValueParameter !=
nullptr);
4296 const T& channelValue = *((
const T*)channelValueParameter);
4298 const unsigned int targetChannels = tSourceChannels + 1u;
4300 for (
size_t n = 0; n < size; ++n)
4302 if constexpr (tAddToFront)
4304 target[0] = channelValue;
4306 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4308 target[c + 1u] = source[c];
4313 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4315 target[c] = source[c];
4318 target[tSourceChannels] = channelValue;
4321 source += tSourceChannels;
4322 target += targetChannels;
4326 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tSourceChannelIndex,
unsigned int tTargetChannelIndex>
4329 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
4330 static_assert(tTargetChannels != 0u,
"Invalid channel number!");
4332 static_assert(tSourceChannelIndex < tSourceChannels,
"Invalid channel number!");
4333 static_assert(tTargetChannelIndex < tTargetChannels,
"Invalid channel number!");
4335 ocean_assert(source !=
nullptr && target !=
nullptr);
4336 ocean_assert(size > 0);
4338 for (
size_t n = 0; n < size; ++n)
4340 target[tTargetChannelIndex] = source[tSourceChannelIndex];
4342 source += tSourceChannels;
4343 target += tTargetChannels;
4347 template <
typename TSource,
typename TTarget>
4348 void FrameChannels::separateTo1ChannelRuntime(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
4350 ocean_assert(sourceFrame !=
nullptr);
4351 ocean_assert(targetFrames !=
nullptr);
4353 ocean_assert(width != 0u && height != 0u);
4354 ocean_assert(channels != 0u);
4357 for (
unsigned int c = 0u; c < channels; ++c)
4359 ocean_assert(targetFrames[c] !=
nullptr);
4363 if (sourceFramePaddingElements == 0u && targetFramesPaddingElements ==
nullptr)
4365 for (
unsigned int n = 0u; n < width * height; ++n)
4367 for (
unsigned int c = 0u; c < channels; ++c)
4369 targetFrames[c][n] = TTarget(sourceFrame[n * channels + c]);
4373 else if (targetFramesPaddingElements ==
nullptr)
4375 ocean_assert(sourceFramePaddingElements != 0u);
4377 const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
4379 for (
unsigned int y = 0u; y < height; ++y)
4381 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
4383 const unsigned int targetRowOffset = y * width;
4385 for (
unsigned int x = 0u; x < width; ++x)
4387 for (
unsigned int c = 0u; c < channels; ++c)
4389 *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * channels + c));
4396 const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
4398 Indices32 targetFrameStrideElements(channels);
4400 for (
unsigned int c = 0u; c < channels; ++c)
4402 targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
4405 for (
unsigned int y = 0u; y < height; ++y)
4407 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
4409 for (
unsigned int x = 0u; x < width; ++x)
4411 for (
unsigned int c = 0u; c < channels; ++c)
4413 *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * channels + c));
4420 template <
typename TSource,
typename TTarget>
4421 void FrameChannels::zipChannelsRuntime(
const TSource*
const* sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
4423 ocean_assert(sourceFrames !=
nullptr);
4424 ocean_assert(targetFrame !=
nullptr);
4426 ocean_assert(width != 0u && height != 0u);
4427 ocean_assert(channels != 0u);
4429 bool allSourceFramesContinuous =
true;
4431 if (sourceFramesPaddingElements !=
nullptr)
4433 for (
unsigned int n = 0u; n < channels; ++n)
4435 if (sourceFramesPaddingElements[n] != 0u)
4437 allSourceFramesContinuous =
false;
4443 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
4445 for (
unsigned int n = 0u; n < width * height; ++n)
4447 for (
unsigned int c = 0u; c < channels; ++c)
4449 targetFrame[n * channels + c] = TTarget(sourceFrames[c][n]);
4455 const unsigned int targetFrameStrideElements = width * channels + targetFramePaddingElements;
4457 Indices32 sourceFrameStrideElements(channels);
4459 for (
unsigned int c = 0u; c < channels; ++c)
4461 if (sourceFramesPaddingElements ==
nullptr)
4463 sourceFrameStrideElements[c] = width;
4467 sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
4471 for (
unsigned int y = 0u; y < height; ++y)
4473 TTarget*
const targetRow = targetFrame + y * targetFrameStrideElements;
4475 for (
unsigned int x = 0u; x < width; ++x)
4477 for (
unsigned int c = 0u; c < channels; ++c)
4479 *(targetRow + x * channels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x));
4486 template <
typename T,
unsigned int tChannel,
unsigned int tChannels>
4487 void FrameChannels::setChannelSubset(T* frame,
const unsigned int width,
const T value,
const unsigned int framePaddingElements,
const unsigned int firstRow,
const unsigned int numberRows)
4489 static_assert(tChannels >= 1u,
"Invalid channel number!");
4490 static_assert(tChannel < tChannels,
"Invalid channel index!");
4492 ocean_assert(frame !=
nullptr);
4494 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
4496 frame += firstRow * frameStrideElements + tChannel;
4498 for (
unsigned int n = 0u; n < numberRows; ++n)
4500 for (
unsigned int x = 0u; x < width; ++x)
4502 frame[x * tChannels] = value;
4505 frame += frameStrideElements;
4509 template <
typename T,
unsigned int tChannels,
void (*tPixelFunction)(const T*, T*)>
4512 static_assert(tChannels >= 1u,
"Invalid channel number");
4514 ocean_assert(source && target);
4515 ocean_assert(source != target);
4517 ocean_assert(numberRows > 0u);
4518 ocean_assert(firstRow + numberRows <= height);
4520 const unsigned int widthElements = width * tChannels;
4521 const unsigned int targetBlockSize = widthElements * numberRows;
4523 switch (conversionFlag)
4527 source += firstRow * widthElements;
4528 target += firstRow * widthElements;
4530 const T*
const targetEnd = target + targetBlockSize;
4532 while (target != targetEnd)
4534 tPixelFunction(source, target);
4536 source += tChannels;
4537 target += tChannels;
4545 source += firstRow * widthElements;
4546 target += width * height * tChannels - (firstRow + 1u) * widthElements;
4548 const T*
const targetEnd = target - targetBlockSize;
4550 while (target != targetEnd)
4552 const T*
const targetRowEnd = target + widthElements;
4554 while (target != targetRowEnd)
4556 tPixelFunction(source, target);
4558 source += tChannels;
4559 target += tChannels;
4562 target -= (widthElements << 1);
4570 source += firstRow * widthElements;
4571 target += (firstRow + 1u) * widthElements;
4573 const T*
const targetEnd = target + targetBlockSize;
4575 while (target != targetEnd)
4577 const T*
const targetRowEnd = target - widthElements;
4579 while (target != targetRowEnd)
4581 tPixelFunction(source, target -= tChannels);
4583 source += tChannels;
4586 target += widthElements << 1;
4594 source += firstRow * widthElements;
4595 target += width * height * tChannels - firstRow * widthElements;
4597 const T*
const targetEnd = target - targetBlockSize;
4599 while (target != targetEnd)
4601 tPixelFunction(source, target -= tChannels);
4603 source += tChannels;
4613 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tPixelFunction)(const TSource*, TTarget*)>
4614 void FrameChannels::applyAdvancedPixelModifierSubset(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows)
4616 static_assert(tSourceChannels >= 1u,
"Invalid source channel number");
4617 static_assert(tTargetChannels >= 1u,
"Invalid target channel number");
4619 ocean_assert(source && target);
4620 ocean_assert((
void*)source != (
void*)target);
4622 ocean_assert(numberRows != 0u);
4623 ocean_assert(firstRow + numberRows <= height);
4625 const unsigned int sourceWidthElements = width * tSourceChannels;
4626 const unsigned int targetWidthElements = width * tTargetChannels;
4628 const unsigned int sourceStrideElements = sourceWidthElements + sourcePaddingElements;
4629 const unsigned int targetStrideElements = targetWidthElements + targetPaddingElements;
4631 switch (conversionFlag)
4635 for (
unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4637 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4638 TTarget* targetPixel = target + rowIndex * targetStrideElements;
4640 for (
unsigned int x = 0u; x < width; ++x)
4642 tPixelFunction(sourcePixel, targetPixel);
4644 sourcePixel += tSourceChannels;
4645 targetPixel += tTargetChannels;
4654 for (
unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4656 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4657 TTarget* targetPixel = target + (height - rowIndex - 1u) * targetStrideElements;
4659 for (
unsigned int x = 0u; x < width; ++x)
4661 tPixelFunction(sourcePixel, targetPixel);
4663 sourcePixel += tSourceChannels;
4664 targetPixel += tTargetChannels;
4673 for (
unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4675 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4677 TTarget*
const targetRowBegin = target + rowIndex * targetStrideElements;
4678 TTarget* targetPixel = targetRowBegin + targetWidthElements - tTargetChannels;
4680 for (
unsigned int x = 0u; x < width; ++x)
4682 ocean_assert(targetPixel >= targetRowBegin);
4683 tPixelFunction(sourcePixel, targetPixel);
4685 sourcePixel += tSourceChannels;
4686 targetPixel -= tTargetChannels;
4695 for (
unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4697 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4699 TTarget*
const targetRowBegin = target + (height - rowIndex - 1u) * targetStrideElements;
4700 TTarget* targetPixel = targetRowBegin + targetWidthElements - tTargetChannels;
4702 for (
unsigned int x = 0u; x < width; ++x)
4704 ocean_assert(targetPixel >= targetRowBegin);
4705 tPixelFunction(sourcePixel, targetPixel);
4707 sourcePixel += tSourceChannels;
4708 targetPixel -= tTargetChannels;
4719 template <
typename TSource0,
typename TSource1,
typename TTarget,
typename TIntermediate,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
4720 void FrameChannels::applyBivariateOperatorSubset(
const TSource0* source0,
const TSource1* source1, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int source0PaddingElements,
const unsigned int source1PaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows)
4722 static_assert(tSourceChannels >= 1u,
"Invalid source channel number");
4723 static_assert(tTargetChannels >= 1u,
"Invalid target channel number");
4724 static_assert(tOperator,
"Invalid operator function");
4726 ocean_assert(source0 !=
nullptr && source1 !=
nullptr && target !=
nullptr);
4727 ocean_assert((
const void*)(source0) != (
const void*)(target));
4728 ocean_assert((
const void*)(source1) != (
const void*)(target));
4730 ocean_assert(numberRows != 0u);
4731 ocean_assert(firstRow + numberRows <= height);
4733 const unsigned int source0StrideElements = width * tSourceChannels + source0PaddingElements;
4734 const unsigned int source1StrideElements = width * tSourceChannels + source1PaddingElements;
4736 const unsigned int targetWidthElements = width * tTargetChannels;
4738 const unsigned int targetStrideElements = targetWidthElements + targetPaddingElements;
4740 switch (conversionFlag)
4744 for (
unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4746 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4747 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4749 TTarget* rowTarget = target + rowIndex * targetStrideElements;
4750 const TTarget*
const rowTargetEnd = rowTarget + targetWidthElements;
4752 while (rowTarget != rowTargetEnd)
4754 ocean_assert(rowTarget < rowTargetEnd);
4756 tOperator(rowSource0, rowSource1, rowTarget);
4758 rowSource0 += tSourceChannels;
4759 rowSource1 += tSourceChannels;
4761 rowTarget += tTargetChannels;
4770 for (
unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4772 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4773 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4775 TTarget* rowTarget = target + (height - rowIndex - 1u) * targetStrideElements;
4776 const TTarget*
const rowTargetEnd = rowTarget + targetWidthElements;
4778 while (rowTarget != rowTargetEnd)
4780 ocean_assert(rowTarget < rowTargetEnd);
4782 tOperator(rowSource0, rowSource1, rowTarget);
4784 rowSource0 += tSourceChannels;
4785 rowSource1 += tSourceChannels;
4787 rowTarget += tTargetChannels;
4796 for (
unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4798 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4799 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4801 TTarget* rowTarget = target + rowIndex * targetStrideElements + targetWidthElements - tTargetChannels;
4802 const TTarget*
const rowTargetEnd = rowTarget - targetWidthElements;
4804 while (rowTarget != rowTargetEnd)
4806 ocean_assert(rowTarget > rowTargetEnd);
4808 tOperator(rowSource0, rowSource1, rowTarget);
4810 rowSource0 += tSourceChannels;
4811 rowSource1 += tSourceChannels;
4813 rowTarget -= tTargetChannels;
4822 for (
unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4824 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4825 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4827 TTarget* rowTarget = target + (height - rowIndex - 1u) * targetStrideElements + targetWidthElements - tTargetChannels;
4828 const TTarget*
const rowTargetEnd = rowTarget - targetWidthElements;
4830 while (rowTarget != rowTargetEnd)
4832 ocean_assert(rowTarget > rowTargetEnd);
4834 tOperator(rowSource0, rowSource1, rowTarget);
4836 rowSource0 += tSourceChannels;
4837 rowSource1 += tSourceChannels;
4839 rowTarget -= tTargetChannels;
4847 ocean_assert(
false &&
"This should never happen!");
4852 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
4853 void FrameChannels::applyRowOperatorSubset(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourceStrideElements,
const unsigned int targetStrideElements,
const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels> rowOperatorFunction,
const unsigned int firstRow,
const unsigned int numberRows)
4855 static_assert(tSourceChannels >= 1u,
"Invalid source channel number");
4856 static_assert(tTargetChannels >= 1u,
"Invalid target channel number");
4858 ocean_assert(source !=
nullptr && target !=
nullptr);
4859 ocean_assert((
const void*)source != (
const void*)target);
4861 ocean_assert(width * tSourceChannels <= sourceStrideElements);
4862 ocean_assert(width * tTargetChannels <= targetStrideElements);
4864 ocean_assert(rowOperatorFunction !=
nullptr);
4866 ocean_assert(numberRows != 0u);
4867 ocean_assert(firstRow + numberRows <= height);
4869 for (
unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4871 rowOperatorFunction(source + y * sourceStrideElements, target + y * targetStrideElements, width, height, y, sourceStrideElements, targetStrideElements);
4875 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2>
4878 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2,
"Invalid channel factors!");
4880 ocean_assert(channelMultiplicationFactors_128 !=
nullptr);
4881 const unsigned int* channelFactors_128 =
reinterpret_cast<const unsigned int*
>(channelMultiplicationFactors_128);
4882 ocean_assert(channelFactors_128 !=
nullptr);
4884 const unsigned int factorChannel0_128 = channelFactors_128[0];
4885 const unsigned int factorChannel1_128 = channelFactors_128[1];
4886 const unsigned int factorChannel2_128 = channelFactors_128[2];
4888 ocean_assert(factorChannel0_128 <= 128u && factorChannel1_128 <= 128u && factorChannel2_128 <= 128u);
4889 ocean_assert(factorChannel0_128 + factorChannel1_128 + factorChannel2_128 == 128u);
4891 ocean_assert(tUseFactorChannel0 == (factorChannel0_128 != 0u));
4892 ocean_assert(tUseFactorChannel1 == (factorChannel1_128 != 0u));
4893 ocean_assert(tUseFactorChannel2 == (factorChannel2_128 != 0u));
4895 ocean_assert(source !=
nullptr && target !=
nullptr && size >= 1);
4897 const uint8_t*
const targetEnd = target + size;
4899 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
4901 constexpr
size_t blockSize = 16;
4902 const size_t blocks = size / blockSize;
4904 const __m128i multiplicationFactors0_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel0_128));
4905 const __m128i multiplicationFactors1_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel1_128));
4906 const __m128i multiplicationFactors2_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel2_128));
4908 for (
size_t n = 0; n < blocks; ++n)
4912 source += blockSize * size_t(3);
4913 target += blockSize;
4916 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4918 constexpr
size_t blockSize = 8;
4919 const size_t blocks = size / blockSize;
4921 const uint8x8_t factorChannel0_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel0_128);
4922 const uint8x8_t factorChannel1_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel1_128);
4923 const uint8x8_t factorChannel2_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel2_128);
4925 for (
size_t n = 0; n < blocks; ++n)
4927 convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON<tUseFactorChannel0, tUseFactorChannel1, tUseFactorChannel2>(source, target, factorChannel0_128_u_8x8, factorChannel1_128_u_8x8, factorChannel2_128_u_8x8);
4929 source += blockSize * size_t(3);
4930 target += blockSize;
4935 while (target != targetEnd)
4937 ocean_assert(target < targetEnd);
4939 const unsigned int channel0 = tUseFactorChannel0 ? (source[0] * factorChannel0_128) : 0u;
4940 const unsigned int channel1 = tUseFactorChannel1 ? (source[1] * factorChannel1_128) : 0u;
4941 const unsigned int channel2 = tUseFactorChannel2 ? (source[2] * factorChannel2_128) : 0u;
4943 *target++ = (uint8_t)((channel0 + channel1 + channel2 + 64u) >> 7u);
4948 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2,
bool tUseFactorChannel3>
4951 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2 || tUseFactorChannel3,
"Invalid channel factors!");
4953 ocean_assert(channelMultiplicationFactors_128 !=
nullptr);
4954 const unsigned int* channelFactors_128 =
reinterpret_cast<const unsigned int*
>(channelMultiplicationFactors_128);
4955 ocean_assert(channelFactors_128 !=
nullptr);
4957 const unsigned int factorChannel0_128 = channelFactors_128[0];
4958 const unsigned int factorChannel1_128 = channelFactors_128[1];
4959 const unsigned int factorChannel2_128 = channelFactors_128[2];
4960 const unsigned int factorChannel3_128 = channelFactors_128[3];
4962 ocean_assert(factorChannel0_128 <= 127u && factorChannel1_128 <= 127u && factorChannel2_128 <= 127u && factorChannel3_128 <= 127u);
4963 ocean_assert(factorChannel0_128 + factorChannel1_128 + factorChannel2_128 + factorChannel3_128 == 128u);
4965 ocean_assert(tUseFactorChannel0 == (factorChannel0_128 != 0u));
4966 ocean_assert(tUseFactorChannel1 == (factorChannel1_128 != 0u));
4967 ocean_assert(tUseFactorChannel2 == (factorChannel2_128 != 0u));
4968 ocean_assert(tUseFactorChannel3 == (factorChannel3_128 != 0u));
4970 ocean_assert(source !=
nullptr && target !=
nullptr && size >= 1);
4972 const uint8_t*
const targetEnd = target + size;
4974 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
4976 constexpr
size_t blockSize = 16;
4977 const size_t blocks = size / blockSize;
4979 const __m128i m128_multiplicationFactors = _mm_set1_epi32(
int(factorChannel0_128 | (factorChannel1_128 << 8u) | (factorChannel2_128 << 16u) | (factorChannel3_128 << 24u)));
4981 for (
size_t n = 0; n < blocks; ++n)
4985 source += blockSize * size_t(4);
4986 target += blockSize;
4989 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4991 constexpr
size_t blockSize = 8;
4992 const size_t blocks = size / blockSize;
4994 const uint8x8_t factorChannel0_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel0_128);
4995 const uint8x8_t factorChannel1_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel1_128);
4996 const uint8x8_t factorChannel2_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel2_128);
4997 const uint8x8_t factorChannel3_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel3_128);
4999 for (
size_t n = 0; n < blocks; ++n)
5001 convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON<tUseFactorChannel0, tUseFactorChannel1, tUseFactorChannel2, tUseFactorChannel3>(source, target, factorChannel0_128_u_8x8, factorChannel1_128_u_8x8, factorChannel2_128_u_8x8, factorChannel3_128_u_8x8);
5003 source += blockSize * size_t(4);
5004 target += blockSize;
5009 while (target != targetEnd)
5011 ocean_assert(target < targetEnd);
5013 const unsigned int channel0 = tUseFactorChannel0 ? (source[0] * factorChannel0_128) : 0u;
5014 const unsigned int channel1 = tUseFactorChannel1 ? (source[1] * factorChannel1_128) : 0u;
5015 const unsigned int channel2 = tUseFactorChannel2 ? (source[2] * factorChannel2_128) : 0u;
5016 const unsigned int channel3 = tUseFactorChannel3 ? (source[3] * factorChannel3_128) : 0u;
5018 *target++ = (uint8_t)((channel0 + channel1 + channel2 + channel3 + 64u) >> 7u);
5023 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
5026 static_assert(tChannels >= 2u,
"Invalid channel number!");
5027 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
5029 ocean_assert(frame !=
nullptr);
5030 ocean_assert(width >= 1u);
5032 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
5034 uint8_t* frameRow = frame + frameStrideElements * firstRow;
5036 for (
unsigned int y = 0u; y < numberRows; ++y)
5038 for (
unsigned int x = 0u; x < width; ++x)
5040 if (frameRow[tAlphaChannelIndex])
5042 const uint8_t alpha_2 = frameRow[tAlphaChannelIndex] / 2u;
5044 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5046 if (channelIndex != tAlphaChannelIndex)
5048 frameRow[channelIndex] = uint8_t(std::min((frameRow[channelIndex] * 255u + alpha_2) / frameRow[tAlphaChannelIndex], 255u));
5053 frameRow += tChannels;
5056 frameRow += framePaddingElements;
5060 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
5063 static_assert(tChannels >= 2u,
"Invalid channel number!");
5064 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
5066 ocean_assert(source !=
nullptr && target !=
nullptr);
5067 ocean_assert(width >= 1u);
5069 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
5070 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
5072 const uint8_t* sourceRow = source + sourceStrideElements * firstRow;
5073 uint8_t* targetRow = target + targetStrideElements * firstRow;
5075 for (
unsigned int y = 0u; y < numberRows; ++y)
5077 for (
unsigned int x = 0u; x < width; ++x)
5079 if (sourceRow[tAlphaChannelIndex])
5081 const uint8_t alpha_2 = sourceRow[tAlphaChannelIndex] / 2u;
5083 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5085 if (channelIndex != tAlphaChannelIndex)
5087 targetRow[channelIndex] = uint8_t(std::max((sourceRow[channelIndex] * 255u + alpha_2) / sourceRow[tAlphaChannelIndex], 255u));
5091 targetRow[channelIndex] = sourceRow[channelIndex];
5097 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5099 targetRow[channelIndex] = sourceRow[channelIndex];
5103 sourceRow += tChannels;
5104 targetRow += tChannels;
5107 sourceRow += sourcePaddingElements;
5108 targetRow += targetPaddingElements;
5112 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
5115 static_assert(tChannels >= 2u,
"Invalid channel number!");
5116 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
5118 ocean_assert(frame !=
nullptr);
5119 ocean_assert(width >= 1u);
5121 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
5123 uint8_t* frameRow = frame + frameStrideElements * firstRow;
5125 for (
unsigned int y = 0u; y < numberRows; ++y)
5127 for (
unsigned int x = 0u; x < width; ++x)
5129 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5131 if (channelIndex != tAlphaChannelIndex)
5133 frameRow[channelIndex] = (frameRow[channelIndex] * frameRow[tAlphaChannelIndex] + 127u) / 255u;
5137 frameRow += tChannels;
5140 frameRow += framePaddingElements;
5144 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
5147 static_assert(tChannels >= 2u,
"Invalid channel number!");
5148 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
5150 ocean_assert(source !=
nullptr && target !=
nullptr);
5151 ocean_assert(width >= 1u);
5153 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
5154 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
5156 const uint8_t* sourceRow = source + sourceStrideElements * firstRow;
5157 uint8_t* targetRow = target + targetStrideElements * firstRow;
5159 for (
unsigned int y = 0u; y < numberRows; ++y)
5161 for (
unsigned int x = 0u; x < width; ++x)
5163 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5165 if (channelIndex != tAlphaChannelIndex)
5167 targetRow[channelIndex] = (sourceRow[channelIndex] * sourceRow[tAlphaChannelIndex] + 127u) / 255u;
5171 targetRow[channelIndex] = sourceRow[channelIndex];
5175 sourceRow += tChannels;
5176 targetRow += tChannels;
5179 sourceRow += sourcePaddingElements;
5180 targetRow += targetPaddingElements;
5184 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
5188 ocean_assert(source !=
nullptr && target !=
nullptr);
5205 const __m128i constant64_u_16x8 = _mm_set1_epi32(0x00400040);
5207 const __m128i sourceA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5208 const __m128i sourceB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5209 const __m128i sourceC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5211 __m128i channel0_u_8x16;
5212 __m128i channel1_u_8x16;
5213 __m128i channel2_u_8x16;
5222 const __m128i channel0_high_u_8x16 = _mm_srli_epi16(channel0_u_8x16, 8);
5223 const __m128i channel1_high_u_8x16 = _mm_srli_epi16(channel1_u_8x16, 8);
5224 const __m128i channel2_high_u_8x16 = _mm_srli_epi16(channel2_u_8x16, 8);
5228 const __m128i result0_low_u_8x16 = _mm_mullo_epi16(channel0_low_u_8x16, multiplicationFactors0_128_u_16x8);
5229 const __m128i result0_high_u_8x16 = _mm_mullo_epi16(channel0_high_u_8x16, multiplicationFactors0_128_u_16x8);
5231 const __m128i result1_low_u_8x16 = _mm_mullo_epi16(channel1_low_u_8x16, multiplicationFactors1_128_u_16x8);
5232 const __m128i result1_high_u_8x16 = _mm_mullo_epi16(channel1_high_u_8x16, multiplicationFactors1_128_u_16x8);
5234 const __m128i result2_low_u_8x16 = _mm_mullo_epi16(channel2_low_u_8x16, multiplicationFactors2_128_u_16x8);
5235 const __m128i result2_high_u_8x16 = _mm_mullo_epi16(channel2_high_u_8x16, multiplicationFactors2_128_u_16x8);
5238 const __m128i result128_low_u_8x16 = _mm_adds_epu16(_mm_adds_epu16(result0_low_u_8x16, result1_low_u_8x16), _mm_adds_epu16(result2_low_u_8x16, constant64_u_16x8));
5239 const __m128i result128_high_u_8x16 = _mm_adds_epu16(_mm_adds_epu16(result0_high_u_8x16, result1_high_u_8x16), _mm_adds_epu16(result2_high_u_8x16, constant64_u_16x8));
5242 const __m128i result_low_u_8x16 = _mm_srli_epi16(result128_low_u_8x16, 7);
5243 const __m128i result_high_u_8x16 = _mm_srli_epi16(result128_high_u_8x16, 7);
5246 const __m128i result_u_8x16 = _mm_or_si128(result_low_u_8x16, _mm_slli_epi16(result_high_u_8x16, 8));
5249 _mm_storeu_si128((__m128i*)target, result_u_8x16);
5252 OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_128_s_16x8,
const __m128i& factorChannel10_128_s_16x8,
const __m128i& factorChannel20_128_s_16x8,
const __m128i& factorChannel01_128_s_16x8,
const __m128i& factorChannel11_128_s_16x8,
const __m128i& factorChannel21_128_s_16x8,
const __m128i& factorChannel02_128_s_16x8,
const __m128i& factorChannel12_128_s_16x8,
const __m128i& factorChannel22_128_s_16x8,
const __m128i& biasChannel0_s_16x8,
const __m128i& biasChannel1_s_16x8,
const __m128i& biasChannel2_s_16x8)
5254 ocean_assert(source !=
nullptr && target !=
nullptr);
5273 const __m128i sourceA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5274 const __m128i sourceB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5275 const __m128i sourceC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5277 __m128i channel0_u_8x16;
5278 __m128i channel1_u_8x16;
5279 __m128i channel2_u_8x16;
5288 const __m128i channel0_high_u_8x16 = _mm_srli_epi16(channel0_u_8x16, 8);
5289 const __m128i channel1_high_u_8x16 = _mm_srli_epi16(channel1_u_8x16, 8);
5290 const __m128i channel2_high_u_8x16 = _mm_srli_epi16(channel2_u_8x16, 8);
5294 __m128i result0_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel00_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel01_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel02_128_s_16x8));
5295 __m128i result1_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel10_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel11_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel12_128_s_16x8));
5296 __m128i result2_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel20_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel21_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel22_128_s_16x8));
5298 __m128i result0_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel00_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel01_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel02_128_s_16x8));
5299 __m128i result1_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel10_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel11_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel12_128_s_16x8));
5300 __m128i result2_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel20_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel21_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel22_128_s_16x8));
5314 const __m128i constant255_s_16x8 = _mm_set1_epi16(255);
5316 result0_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result0_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5317 result1_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result1_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5318 result2_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result2_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5320 result0_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result0_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5321 result1_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result1_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5322 result2_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result2_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5325 const __m128i result0_u_8x16 = _mm_or_si128(result0_low_u_8x16, _mm_slli_epi16(result0_high_u_8x16, 8));
5326 const __m128i result1_u_8x16 = _mm_or_si128(result1_low_u_8x16, _mm_slli_epi16(result1_high_u_8x16, 8));
5327 const __m128i result2_u_8x16 = _mm_or_si128(result2_low_u_8x16, _mm_slli_epi16(result2_high_u_8x16, 8));
5329 __m128i resultA_u_8x16;
5330 __m128i resultB_u_8x16;
5331 __m128i resultC_u_8x16;
5335 _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5336 _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5337 _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5340 OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_1024_s_16x8,
const __m128i& factorChannel10_1024_s_16x8,
const __m128i& factorChannel20_1024_s_16x8,
const __m128i& factorChannel01_1024_s_16x8,
const __m128i& factorChannel11_1024_s_16x8,
const __m128i& factorChannel21_1024_s_16x8,
const __m128i& factorChannel02_1024_s_16x8,
const __m128i& factorChannel12_1024_s_16x8,
const __m128i& factorChannel22_1024_s_16x8,
const __m128i& biasChannel0_1024_s_32x4,
const __m128i& biasChannel1_1024_s_32x4,
const __m128i& biasChannel2_1024_s_32x4)
5342 ocean_assert(source !=
nullptr && target !=
nullptr);
5362 const __m128i sourceA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5363 const __m128i sourceB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5364 const __m128i sourceC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5366 __m128i channel0_u_8x16;
5367 __m128i channel1_u_8x16;
5368 __m128i channel2_u_8x16;
5378 const __m128i channel0_high_u_16x8 = _mm_srli_epi16(channel0_u_8x16, 8);
5379 const __m128i channel1_high_u_16x8 = _mm_srli_epi16(channel1_u_8x16, 8);
5380 const __m128i channel2_high_u_16x8 = _mm_srli_epi16(channel2_u_8x16, 8);
5385 __m128i result0_low_A_s_32x4;
5386 __m128i result0_low_B_s_32x4;
5387 __m128i result0_high_A_s_32x4;
5388 __m128i result0_high_B_s_32x4;
5405 __m128i result1_low_A_s_32x4;
5406 __m128i result1_low_B_s_32x4;
5407 __m128i result1_high_A_s_32x4;
5408 __m128i result1_high_B_s_32x4;
5425 __m128i result2_low_A_s_32x4;
5426 __m128i result2_low_B_s_32x4;
5427 __m128i result2_high_A_s_32x4;
5428 __m128i result2_high_B_s_32x4;
5448 const __m128i mask_0000FFFF_32x4 = _mm_set1_epi32(0x0000FFFF);
5450 __m128i result0_A_s_16x8 = _mm_or_si128(_mm_and_si128(result0_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result0_high_A_s_32x4, 16));
5451 __m128i result0_B_s_16x8 = _mm_or_si128(_mm_and_si128(result0_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result0_high_B_s_32x4, 16));
5453 __m128i result1_A_s_16x8 = _mm_or_si128(_mm_and_si128(result1_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result1_high_A_s_32x4, 16));
5454 __m128i result1_B_s_16x8 = _mm_or_si128(_mm_and_si128(result1_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result1_high_B_s_32x4, 16));
5456 __m128i result2_A_s_16x8 = _mm_or_si128(_mm_and_si128(result2_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result2_high_A_s_32x4, 16));
5457 __m128i result2_B_s_16x8 = _mm_or_si128(_mm_and_si128(result2_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result2_high_B_s_32x4, 16));
5462 const __m128i result0_u_8x16 = _mm_packus_epi16(result0_A_s_16x8, result0_B_s_16x8);
5463 const __m128i result1_u_8x16 = _mm_packus_epi16(result1_A_s_16x8, result1_B_s_16x8);
5464 const __m128i result2_u_8x16 = _mm_packus_epi16(result2_A_s_16x8, result2_B_s_16x8);
5466 __m128i resultA_u_8x16;
5467 __m128i resultB_u_8x16;
5468 __m128i resultC_u_8x16;
5472 _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5473 _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5474 _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5479 ocean_assert(source !=
nullptr && target !=
nullptr);
5502 const __m128i constant64_u_8x16 = _mm_set1_epi32(0x00400040);
5504 const __m128i pixelsA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5505 const __m128i pixelsB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5506 const __m128i pixelsC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5507 const __m128i pixelsD_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 3);
5512 const __m128i intermediateResults0_u_16x8 = _mm_maddubs_epi16(pixelsA_u_8x16, multiplicationFactors0123_128_s_32x4);
5513 const __m128i intermediateResults1_u_16x8 = _mm_maddubs_epi16(pixelsB_u_8x16, multiplicationFactors0123_128_s_32x4);
5514 const __m128i intermediateResults2_u_16x8 = _mm_maddubs_epi16(pixelsC_u_8x16, multiplicationFactors0123_128_s_32x4);
5515 const __m128i intermediateResults3_u_16x8 = _mm_maddubs_epi16(pixelsD_u_8x16, multiplicationFactors0123_128_s_32x4);
5518 __m128i grayA_u_16x8 = _mm_hadd_epi16(intermediateResults0_u_16x8, intermediateResults1_u_16x8);
5519 __m128i grayB_u_16x8 = _mm_hadd_epi16(intermediateResults2_u_16x8, intermediateResults3_u_16x8);
5522 grayA_u_16x8 = _mm_add_epi16(grayA_u_16x8, constant64_u_8x16);
5523 grayB_u_16x8 = _mm_add_epi16(grayB_u_16x8, constant64_u_8x16);
5526 grayA_u_16x8 = _mm_srli_epi16(grayA_u_16x8, 7);
5527 grayB_u_16x8 = _mm_srli_epi16(grayB_u_16x8, 7);
5534 const __m128i gray_u_8x16 = _mm_packus_epi16(grayA_u_16x8, grayB_u_16x8);
5537 _mm_storeu_si128((__m128i*)target, gray_u_8x16);
5542 ocean_assert(source !=
nullptr && target !=
nullptr);
5563 const __m128i constant64_u_8x16 = _mm_set1_epi32(0x00400040);
5565 const __m128i pixelsA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5566 const __m128i pixelsB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5567 const __m128i pixelsC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5568 const __m128i pixelsD_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 3);
5572 const __m128i pixelsA_u_16x8 = _mm_unpacklo_epi8(pixelsA_u_8x16, _mm_setzero_si128());
5573 const __m128i pixelsB_u_16x8 = _mm_unpackhi_epi8(pixelsA_u_8x16, _mm_setzero_si128());
5575 const __m128i pixelsC_u_16x8 = _mm_unpacklo_epi8(pixelsB_u_8x16, _mm_setzero_si128());
5576 const __m128i pixelsD_u_16x8 = _mm_unpackhi_epi8(pixelsB_u_8x16, _mm_setzero_si128());
5578 const __m128i pixelsE_u_16x8 = _mm_unpacklo_epi8(pixelsC_u_8x16, _mm_setzero_si128());
5579 const __m128i pixelsF_u_16x8 = _mm_unpackhi_epi8(pixelsC_u_8x16, _mm_setzero_si128());
5581 const __m128i pixelsG_u_16x8 = _mm_unpacklo_epi8(pixelsD_u_8x16, _mm_setzero_si128());
5582 const __m128i pixelsH_u_16x8 = _mm_unpackhi_epi8(pixelsD_u_8x16, _mm_setzero_si128());
5588 const __m128i intermediateResultsChannel0_0_u_32x4 = _mm_madd_epi16(pixelsA_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5589 const __m128i intermediateResultsChannel0_1_u_32x4 = _mm_madd_epi16(pixelsB_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5590 const __m128i intermediateResultsChannel0_2_u_32x4 = _mm_madd_epi16(pixelsC_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5591 const __m128i intermediateResultsChannel0_3_u_32x4 = _mm_madd_epi16(pixelsD_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5592 const __m128i intermediateResultsChannel0_4_u_32x4 = _mm_madd_epi16(pixelsE_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5593 const __m128i intermediateResultsChannel0_5_u_32x4 = _mm_madd_epi16(pixelsF_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5594 const __m128i intermediateResultsChannel0_6_u_32x4 = _mm_madd_epi16(pixelsG_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5595 const __m128i intermediateResultsChannel0_7_u_32x4 = _mm_madd_epi16(pixelsH_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5597 const __m128i resultsChannel0_A_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_0_u_32x4, intermediateResultsChannel0_1_u_32x4);
5598 const __m128i resultsChannel0_B_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_2_u_32x4, intermediateResultsChannel0_3_u_32x4);
5599 const __m128i resultsChannel0_C_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_4_u_32x4, intermediateResultsChannel0_5_u_32x4);
5600 const __m128i resultsChannel0_D_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_6_u_32x4, intermediateResultsChannel0_7_u_32x4);
5603 const __m128i intermediateResultsChannel1_0_u_32x4 = _mm_madd_epi16(pixelsA_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5604 const __m128i intermediateResultsChannel1_1_u_32x4 = _mm_madd_epi16(pixelsB_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5605 const __m128i intermediateResultsChannel1_2_u_32x4 = _mm_madd_epi16(pixelsC_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5606 const __m128i intermediateResultsChannel1_3_u_32x4 = _mm_madd_epi16(pixelsD_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5607 const __m128i intermediateResultsChannel1_4_u_32x4 = _mm_madd_epi16(pixelsE_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5608 const __m128i intermediateResultsChannel1_5_u_32x4 = _mm_madd_epi16(pixelsF_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5609 const __m128i intermediateResultsChannel1_6_u_32x4 = _mm_madd_epi16(pixelsG_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5610 const __m128i intermediateResultsChannel1_7_u_32x4 = _mm_madd_epi16(pixelsH_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5612 const __m128i resultsChannel1_A_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_0_u_32x4, intermediateResultsChannel1_1_u_32x4);
5613 const __m128i resultsChannel1_B_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_2_u_32x4, intermediateResultsChannel1_3_u_32x4);
5614 const __m128i resultsChannel1_C_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_4_u_32x4, intermediateResultsChannel1_5_u_32x4);
5615 const __m128i resultsChannel1_D_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_6_u_32x4, intermediateResultsChannel1_7_u_32x4);
5619 __m128i resultA_u_16x8 = _mm_or_si128(resultsChannel0_A_u_32x4, _mm_slli_epi32(resultsChannel1_A_u_32x4, 16));
5620 __m128i resultB_u_16x8 = _mm_or_si128(resultsChannel0_B_u_32x4, _mm_slli_epi32(resultsChannel1_B_u_32x4, 16));
5621 __m128i resultC_u_16x8 = _mm_or_si128(resultsChannel0_C_u_32x4, _mm_slli_epi32(resultsChannel1_C_u_32x4, 16));
5622 __m128i resultD_u_16x8 = _mm_or_si128(resultsChannel0_D_u_32x4, _mm_slli_epi32(resultsChannel1_D_u_32x4, 16));
5625 resultA_u_16x8 = _mm_add_epi16(resultA_u_16x8, constant64_u_8x16);
5626 resultB_u_16x8 = _mm_add_epi16(resultB_u_16x8, constant64_u_8x16);
5627 resultC_u_16x8 = _mm_add_epi16(resultC_u_16x8, constant64_u_8x16);
5628 resultD_u_16x8 = _mm_add_epi16(resultD_u_16x8, constant64_u_8x16);
5631 resultA_u_16x8 = _mm_srli_epi16(resultA_u_16x8, 7);
5632 resultB_u_16x8 = _mm_srli_epi16(resultB_u_16x8, 7);
5633 resultC_u_16x8 = _mm_srli_epi16(resultC_u_16x8, 7);
5634 resultD_u_16x8 = _mm_srli_epi16(resultD_u_16x8, 7);
5641 const __m128i resultAB_u_8x16 = _mm_packus_epi16(resultA_u_16x8, resultB_u_16x8);
5642 const __m128i resultCD_u_8x16 = _mm_packus_epi16(resultC_u_16x8, resultD_u_16x8);
5645 _mm_storeu_si128((__m128i*)target + 0, resultAB_u_8x16);
5646 _mm_storeu_si128((__m128i*)target + 1, resultCD_u_8x16);
5651 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5653 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2>
5656 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2,
"Invalid multiplication factors!");
5658 ocean_assert(source !=
nullptr && target !=
nullptr);
5677 uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5679 uint16x8_t intermediateResults_u_16x8;
5683 if constexpr (tUseFactorChannel0)
5685 intermediateResults_u_16x8 = vmull_u8(source_u_8x8x3.val[0], factorChannel0_128_u_8x8);
5689 intermediateResults_u_16x8 = vdupq_n_u16(0u);
5694 if constexpr (tUseFactorChannel1)
5696 intermediateResults_u_16x8 = vmlal_u8(intermediateResults_u_16x8, source_u_8x8x3.val[1], factorChannel1_128_u_8x8);
5701 if constexpr (tUseFactorChannel2)
5703 intermediateResults_u_16x8 = vmlal_u8(intermediateResults_u_16x8, source_u_8x8x3.val[2], factorChannel2_128_u_8x8);
5707 uint8x8_t results_u_8x8 = vqrshrn_n_u16(intermediateResults_u_16x8, 7);
5710 vst1_u8(target, results_u_8x8);
5713 OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8)
5715 ocean_assert(source !=
nullptr && target !=
nullptr);
5735 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5738 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[0], biasChannel0_u_8x8));
5739 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[1], biasChannel1_u_8x8));
5740 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[2], biasChannel2_u_8x8));
5744 int16x8_t intermediateResults0_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel00_64_s_16x8);
5745 int16x8_t intermediateResults1_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel10_64_s_16x8);
5746 int16x8_t intermediateResults2_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel20_64_s_16x8);
5748 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, vmulq_s16(source1_s_16x8, factorChannel01_64_s_16x8));
5749 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, vmulq_s16(source1_s_16x8, factorChannel11_64_s_16x8));
5750 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, vmulq_s16(source1_s_16x8, factorChannel21_64_s_16x8));
5752 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, vmulq_s16(source2_s_16x8, factorChannel02_64_s_16x8));
5753 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, vmulq_s16(source2_s_16x8, factorChannel12_64_s_16x8));
5754 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, vmulq_s16(source2_s_16x8, factorChannel22_64_s_16x8));
5756 uint8x8x3_t results_u_8x8x3;
5759 results_u_8x8x3.val[0] = vqrshrun_n_s16(intermediateResults0_s_16x8, 6);
5760 results_u_8x8x3.val[1] = vqrshrun_n_s16(intermediateResults1_s_16x8, 6);
5761 results_u_8x8x3.val[2] = vqrshrun_n_s16(intermediateResults2_s_16x8, 6);
5764 vst3_u8(target, results_u_8x8x3);
5767 OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8)
5769 ocean_assert(source !=
nullptr && target !=
nullptr);
5784 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
5787 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
5788 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
5789 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
5791 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
5792 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
5793 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
5797 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_64_s_16x8);
5798 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_64_s_16x8);
5799 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_64_s_16x8);
5801 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_64_s_16x8);
5802 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_64_s_16x8);
5803 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_64_s_16x8);
5805 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel01_64_s_16x8));
5806 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel11_64_s_16x8));
5807 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel21_64_s_16x8));
5809 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel01_64_s_16x8));
5810 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel11_64_s_16x8));
5811 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel21_64_s_16x8));
5813 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel02_64_s_16x8));
5814 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel12_64_s_16x8));
5815 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel22_64_s_16x8));
5817 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel02_64_s_16x8));
5818 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel12_64_s_16x8));
5819 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel22_64_s_16x8));
5821 uint8x16x3_t results_u_8x16x3;
5824 results_u_8x16x3.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 6));
5825 results_u_8x16x3.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 6));
5826 results_u_8x16x3.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 6));
5829 vst3q_u8(target, results_u_8x16x3);
5832 OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_128_s_16x8,
const int16x8_t& factorChannel10_128_s_16x8,
const int16x8_t& factorChannel20_128_s_16x8,
const int16x8_t& factorChannel01_128_s_16x8,
const int16x8_t& factorChannel11_128_s_16x8,
const int16x8_t& factorChannel21_128_s_16x8,
const int16x8_t& factorChannel02_128_s_16x8,
const int16x8_t& factorChannel12_128_s_16x8,
const int16x8_t& factorChannel22_128_s_16x8,
const int16x8_t& biasChannel0_128_s_16x8,
const int16x8_t& biasChannel1_128_s_16x8,
const int16x8_t& biasChannel2_128_s_16x8)
5834 ocean_assert(source !=
nullptr && target !=
nullptr);
5854 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5856 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[0]));
5857 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[1]));
5858 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[2]));
5860 int16x8_t intermediateResults0_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel00_128_s_16x8);
5861 int16x8_t intermediateResults1_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel10_128_s_16x8);
5862 int16x8_t intermediateResults2_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel20_128_s_16x8);
5864 intermediateResults0_s_16x8 = vmlaq_s16(intermediateResults0_s_16x8, source1_s_16x8, factorChannel01_128_s_16x8);
5865 intermediateResults1_s_16x8 = vmlaq_s16(intermediateResults1_s_16x8, source1_s_16x8, factorChannel11_128_s_16x8);
5866 intermediateResults2_s_16x8 = vmlaq_s16(intermediateResults2_s_16x8, source1_s_16x8, factorChannel21_128_s_16x8);
5868 intermediateResults0_s_16x8 = vmlaq_s16(intermediateResults0_s_16x8, source2_s_16x8, factorChannel02_128_s_16x8);
5869 intermediateResults1_s_16x8 = vmlaq_s16(intermediateResults1_s_16x8, source2_s_16x8, factorChannel12_128_s_16x8);
5870 intermediateResults2_s_16x8 = vmlaq_s16(intermediateResults2_s_16x8, source2_s_16x8, factorChannel22_128_s_16x8);
5874 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, biasChannel0_128_s_16x8);
5875 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, biasChannel1_128_s_16x8);
5876 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, biasChannel2_128_s_16x8);
5878 uint8x8x3_t results_u_8x8x3;
5881 results_u_8x8x3.val[0] = vqrshrun_n_s16(intermediateResults0_s_16x8, 7);
5882 results_u_8x8x3.val[1] = vqrshrun_n_s16(intermediateResults1_s_16x8, 7);
5883 results_u_8x8x3.val[2] = vqrshrun_n_s16(intermediateResults2_s_16x8, 7);
5886 vst3_u8(target, results_u_8x8x3);
5889 OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x4_t& factorChannel00_1024_s_16x4,
const int16x4_t& factorChannel10_1024_s_16x4,
const int16x4_t& factorChannel20_1024_s_16x4,
const int16x4_t& factorChannel01_1024_s_16x4,
const int16x4_t& factorChannel11_1024_s_16x4,
const int16x4_t& factorChannel21_1024_s_16x4,
const int16x4_t& factorChannel02_1024_s_16x4,
const int16x4_t& factorChannel12_1024_s_16x4,
const int16x4_t& factorChannel22_1024_s_16x4,
const int32x4_t& biasChannel0_1024_s_32x4,
const int32x4_t& biasChannel1_1024_s_32x4,
const int32x4_t& biasChannel2_1024_s_32x4)
5891 ocean_assert(source !=
nullptr && target !=
nullptr);
5912 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5914 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[0]));
5915 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[1]));
5916 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[2]));
5918 const int16x4_t source0_low_s_16x4 = vget_low_s16(source0_s_16x8);
5919 const int16x4_t source0_high_s_16x4 = vget_high_s16(source0_s_16x8);
5921 int32x4_t intermediateResults0_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel00_1024_s_16x4);
5922 int32x4_t intermediateResults0_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel00_1024_s_16x4);
5924 int32x4_t intermediateResults1_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel10_1024_s_16x4);
5925 int32x4_t intermediateResults1_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel10_1024_s_16x4);
5927 int32x4_t intermediateResults2_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel20_1024_s_16x4);
5928 int32x4_t intermediateResults2_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel20_1024_s_16x4);
5931 const int16x4_t source1_low_s_16x4 = vget_low_s16(source1_s_16x8);
5932 const int16x4_t source1_high_s_16x4 = vget_high_s16(source1_s_16x8);
5934 intermediateResults0_low_s_32x4 = vmlal_s16(intermediateResults0_low_s_32x4, source1_low_s_16x4, factorChannel01_1024_s_16x4);
5935 intermediateResults0_high_s_32x4 = vmlal_s16(intermediateResults0_high_s_32x4, source1_high_s_16x4, factorChannel01_1024_s_16x4);
5937 intermediateResults1_low_s_32x4 = vmlal_s16(intermediateResults1_low_s_32x4, source1_low_s_16x4, factorChannel11_1024_s_16x4);
5938 intermediateResults1_high_s_32x4 = vmlal_s16(intermediateResults1_high_s_32x4, source1_high_s_16x4, factorChannel11_1024_s_16x4);
5940 intermediateResults2_low_s_32x4 = vmlal_s16(intermediateResults2_low_s_32x4, source1_low_s_16x4, factorChannel21_1024_s_16x4);
5941 intermediateResults2_high_s_32x4 = vmlal_s16(intermediateResults2_high_s_32x4, source1_high_s_16x4, factorChannel21_1024_s_16x4);
5944 const int16x4_t source2_low_s_16x4 = vget_low_s16(source2_s_16x8);
5945 const int16x4_t source2_high_s_16x4 = vget_high_s16(source2_s_16x8);
5947 intermediateResults0_low_s_32x4 = vmlal_s16(intermediateResults0_low_s_32x4, source2_low_s_16x4, factorChannel02_1024_s_16x4);
5948 intermediateResults0_high_s_32x4 = vmlal_s16(intermediateResults0_high_s_32x4, source2_high_s_16x4, factorChannel02_1024_s_16x4);
5950 intermediateResults1_low_s_32x4 = vmlal_s16(intermediateResults1_low_s_32x4, source2_low_s_16x4, factorChannel12_1024_s_16x4);
5951 intermediateResults1_high_s_32x4 = vmlal_s16(intermediateResults1_high_s_32x4, source2_high_s_16x4, factorChannel12_1024_s_16x4);
5953 intermediateResults2_low_s_32x4 = vmlal_s16(intermediateResults2_low_s_32x4, source2_low_s_16x4, factorChannel22_1024_s_16x4);
5954 intermediateResults2_high_s_32x4 = vmlal_s16(intermediateResults2_high_s_32x4, source2_high_s_16x4, factorChannel22_1024_s_16x4);
5959 intermediateResults0_low_s_32x4 = vaddq_s32(intermediateResults0_low_s_32x4, biasChannel0_1024_s_32x4);
5960 intermediateResults0_high_s_32x4 = vaddq_s32(intermediateResults0_high_s_32x4, biasChannel0_1024_s_32x4);
5962 intermediateResults1_low_s_32x4 = vaddq_s32(intermediateResults1_low_s_32x4, biasChannel1_1024_s_32x4);
5963 intermediateResults1_high_s_32x4 = vaddq_s32(intermediateResults1_high_s_32x4, biasChannel1_1024_s_32x4);
5965 intermediateResults2_low_s_32x4 = vaddq_s32(intermediateResults2_low_s_32x4, biasChannel2_1024_s_32x4);
5966 intermediateResults2_high_s_32x4 = vaddq_s32(intermediateResults2_high_s_32x4, biasChannel2_1024_s_32x4);
5969 uint8x8x3_t results_u_8x8x3;
5972 results_u_8x8x3.val[0] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_high_s_32x4, 10)));
5973 results_u_8x8x3.val[1] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_high_s_32x4, 10)));
5974 results_u_8x8x3.val[2] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_high_s_32x4, 10)));
5977 vst3_u8(target, results_u_8x8x3);
5980 OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x4_t& factorChannel00_1024_s_16x4,
const int16x4_t& factorChannel10_1024_s_16x4,
const int16x4_t& factorChannel20_1024_s_16x4,
const int16x4_t& factorChannel01_1024_s_16x4,
const int16x4_t& factorChannel11_1024_s_16x4,
const int16x4_t& factorChannel21_1024_s_16x4,
const int16x4_t& factorChannel02_1024_s_16x4,
const int16x4_t& factorChannel12_1024_s_16x4,
const int16x4_t& factorChannel22_1024_s_16x4,
const int32x4_t& biasChannel0_1024_s_32x4,
const int32x4_t& biasChannel1_1024_s_32x4,
const int32x4_t& biasChannel2_1024_s_32x4)
5982 ocean_assert(source !=
nullptr && target !=
nullptr);
6003 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6005 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[0])));
6006 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[1])));
6007 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[2])));
6009 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[0])));
6010 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[1])));
6011 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[2])));
6013 const int16x4_t source0_A_s_16x4 = vget_low_s16(source0_low_s_16x8);
6014 const int16x4_t source0_B_s_16x4 = vget_high_s16(source0_low_s_16x8);
6015 const int16x4_t source0_C_s_16x4 = vget_low_s16(source0_high_s_16x8);
6016 const int16x4_t source0_D_s_16x4 = vget_high_s16(source0_high_s_16x8);
6018 int32x4_t intermediateResults0_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel00_1024_s_16x4);
6019 int32x4_t intermediateResults0_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel00_1024_s_16x4);
6020 int32x4_t intermediateResults0_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel00_1024_s_16x4);
6021 int32x4_t intermediateResults0_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel00_1024_s_16x4);
6023 int32x4_t intermediateResults1_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel10_1024_s_16x4);
6024 int32x4_t intermediateResults1_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel10_1024_s_16x4);
6025 int32x4_t intermediateResults1_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel10_1024_s_16x4);
6026 int32x4_t intermediateResults1_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel10_1024_s_16x4);
6028 int32x4_t intermediateResults2_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel20_1024_s_16x4);
6029 int32x4_t intermediateResults2_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel20_1024_s_16x4);
6030 int32x4_t intermediateResults2_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel20_1024_s_16x4);
6031 int32x4_t intermediateResults2_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel20_1024_s_16x4);
6034 const int16x4_t source1_A_s_16x4 = vget_low_s16(source1_low_s_16x8);
6035 const int16x4_t source1_B_s_16x4 = vget_high_s16(source1_low_s_16x8);
6036 const int16x4_t source1_C_s_16x4 = vget_low_s16(source1_high_s_16x8);
6037 const int16x4_t source1_D_s_16x4 = vget_high_s16(source1_high_s_16x8);
6039 intermediateResults0_A_s_32x4 = vmlal_s16(intermediateResults0_A_s_32x4, source1_A_s_16x4, factorChannel01_1024_s_16x4);
6040 intermediateResults0_B_s_32x4 = vmlal_s16(intermediateResults0_B_s_32x4, source1_B_s_16x4, factorChannel01_1024_s_16x4);
6041 intermediateResults0_C_s_32x4 = vmlal_s16(intermediateResults0_C_s_32x4, source1_C_s_16x4, factorChannel01_1024_s_16x4);
6042 intermediateResults0_D_s_32x4 = vmlal_s16(intermediateResults0_D_s_32x4, source1_D_s_16x4, factorChannel01_1024_s_16x4);
6044 intermediateResults1_A_s_32x4 = vmlal_s16(intermediateResults1_A_s_32x4, source1_A_s_16x4, factorChannel11_1024_s_16x4);
6045 intermediateResults1_B_s_32x4 = vmlal_s16(intermediateResults1_B_s_32x4, source1_B_s_16x4, factorChannel11_1024_s_16x4);
6046 intermediateResults1_C_s_32x4 = vmlal_s16(intermediateResults1_C_s_32x4, source1_C_s_16x4, factorChannel11_1024_s_16x4);
6047 intermediateResults1_D_s_32x4 = vmlal_s16(intermediateResults1_D_s_32x4, source1_D_s_16x4, factorChannel11_1024_s_16x4);
6049 intermediateResults2_A_s_32x4 = vmlal_s16(intermediateResults2_A_s_32x4, source1_A_s_16x4, factorChannel21_1024_s_16x4);
6050 intermediateResults2_B_s_32x4 = vmlal_s16(intermediateResults2_B_s_32x4, source1_B_s_16x4, factorChannel21_1024_s_16x4);
6051 intermediateResults2_C_s_32x4 = vmlal_s16(intermediateResults2_C_s_32x4, source1_C_s_16x4, factorChannel21_1024_s_16x4);
6052 intermediateResults2_D_s_32x4 = vmlal_s16(intermediateResults2_D_s_32x4, source1_D_s_16x4, factorChannel21_1024_s_16x4);
6055 const int16x4_t source2_A_s_16x4 = vget_low_s16(source2_low_s_16x8);
6056 const int16x4_t source2_B_s_16x4 = vget_high_s16(source2_low_s_16x8);
6057 const int16x4_t source2_C_s_16x4 = vget_low_s16(source2_high_s_16x8);
6058 const int16x4_t source2_D_s_16x4 = vget_high_s16(source2_high_s_16x8);
6060 intermediateResults0_A_s_32x4 = vmlal_s16(intermediateResults0_A_s_32x4, source2_A_s_16x4, factorChannel02_1024_s_16x4);
6061 intermediateResults0_B_s_32x4 = vmlal_s16(intermediateResults0_B_s_32x4, source2_B_s_16x4, factorChannel02_1024_s_16x4);
6062 intermediateResults0_C_s_32x4 = vmlal_s16(intermediateResults0_C_s_32x4, source2_C_s_16x4, factorChannel02_1024_s_16x4);
6063 intermediateResults0_D_s_32x4 = vmlal_s16(intermediateResults0_D_s_32x4, source2_D_s_16x4, factorChannel02_1024_s_16x4);
6065 intermediateResults1_A_s_32x4 = vmlal_s16(intermediateResults1_A_s_32x4, source2_A_s_16x4, factorChannel12_1024_s_16x4);
6066 intermediateResults1_B_s_32x4 = vmlal_s16(intermediateResults1_B_s_32x4, source2_B_s_16x4, factorChannel12_1024_s_16x4);
6067 intermediateResults1_C_s_32x4 = vmlal_s16(intermediateResults1_C_s_32x4, source2_C_s_16x4, factorChannel12_1024_s_16x4);
6068 intermediateResults1_D_s_32x4 = vmlal_s16(intermediateResults1_D_s_32x4, source2_D_s_16x4, factorChannel12_1024_s_16x4);
6070 intermediateResults2_A_s_32x4 = vmlal_s16(intermediateResults2_A_s_32x4, source2_A_s_16x4, factorChannel22_1024_s_16x4);
6071 intermediateResults2_B_s_32x4 = vmlal_s16(intermediateResults2_B_s_32x4, source2_B_s_16x4, factorChannel22_1024_s_16x4);
6072 intermediateResults2_C_s_32x4 = vmlal_s16(intermediateResults2_C_s_32x4, source2_C_s_16x4, factorChannel22_1024_s_16x4);
6073 intermediateResults2_D_s_32x4 = vmlal_s16(intermediateResults2_D_s_32x4, source2_D_s_16x4, factorChannel22_1024_s_16x4);
6078 intermediateResults0_A_s_32x4 = vaddq_s32(intermediateResults0_A_s_32x4, biasChannel0_1024_s_32x4);
6079 intermediateResults0_B_s_32x4 = vaddq_s32(intermediateResults0_B_s_32x4, biasChannel0_1024_s_32x4);
6080 intermediateResults0_C_s_32x4 = vaddq_s32(intermediateResults0_C_s_32x4, biasChannel0_1024_s_32x4);
6081 intermediateResults0_D_s_32x4 = vaddq_s32(intermediateResults0_D_s_32x4, biasChannel0_1024_s_32x4);
6083 intermediateResults1_A_s_32x4 = vaddq_s32(intermediateResults1_A_s_32x4, biasChannel1_1024_s_32x4);
6084 intermediateResults1_B_s_32x4 = vaddq_s32(intermediateResults1_B_s_32x4, biasChannel1_1024_s_32x4);
6085 intermediateResults1_C_s_32x4 = vaddq_s32(intermediateResults1_C_s_32x4, biasChannel1_1024_s_32x4);
6086 intermediateResults1_D_s_32x4 = vaddq_s32(intermediateResults1_D_s_32x4, biasChannel1_1024_s_32x4);
6088 intermediateResults2_A_s_32x4 = vaddq_s32(intermediateResults2_A_s_32x4, biasChannel2_1024_s_32x4);
6089 intermediateResults2_B_s_32x4 = vaddq_s32(intermediateResults2_B_s_32x4, biasChannel2_1024_s_32x4);
6090 intermediateResults2_C_s_32x4 = vaddq_s32(intermediateResults2_C_s_32x4, biasChannel2_1024_s_32x4);
6091 intermediateResults2_D_s_32x4 = vaddq_s32(intermediateResults2_D_s_32x4, biasChannel2_1024_s_32x4);
6094 uint8x16x3_t results_u_8x16x3;
6097 results_u_8x16x3.val[0] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_D_s_32x4, 10))));
6099 results_u_8x16x3.val[1] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_D_s_32x4, 10))));
6100 results_u_8x16x3.val[2] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_D_s_32x4, 10))));
6103 vst3q_u8(target, results_u_8x16x3);
6106 OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_128_s_16x8,
const int16x8_t& factorChannel10_128_s_16x8,
const int16x8_t& factorChannel20_128_s_16x8,
const int16x8_t& factorChannel01_128_s_16x8,
const int16x8_t& factorChannel11_128_s_16x8,
const int16x8_t& factorChannel21_128_s_16x8,
const int16x8_t& factorChannel02_128_s_16x8,
const int16x8_t& factorChannel12_128_s_16x8,
const int16x8_t& factorChannel22_128_s_16x8,
const int16x8_t& biasChannel0_128_s_16x8,
const int16x8_t& biasChannel1_128_s_16x8,
const int16x8_t& biasChannel2_128_s_16x8)
6108 ocean_assert(source !=
nullptr && target !=
nullptr);
6128 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6130 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[0])));
6131 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[1])));
6132 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[2])));
6134 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[0])));
6135 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[1])));
6136 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[2])));
6139 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_128_s_16x8);
6140 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_128_s_16x8);
6141 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_128_s_16x8);
6143 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_128_s_16x8);
6144 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_128_s_16x8);
6145 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_128_s_16x8);
6148 intermediateResults0_low_s_16x8 = vmlaq_s16(intermediateResults0_low_s_16x8, source1_low_s_16x8, factorChannel01_128_s_16x8);
6149 intermediateResults1_low_s_16x8 = vmlaq_s16(intermediateResults1_low_s_16x8, source1_low_s_16x8, factorChannel11_128_s_16x8);
6150 intermediateResults2_low_s_16x8 = vmlaq_s16(intermediateResults2_low_s_16x8, source1_low_s_16x8, factorChannel21_128_s_16x8);
6152 intermediateResults0_high_s_16x8 = vmlaq_s16(intermediateResults0_high_s_16x8, source1_high_s_16x8, factorChannel01_128_s_16x8);
6153 intermediateResults1_high_s_16x8 = vmlaq_s16(intermediateResults1_high_s_16x8, source1_high_s_16x8, factorChannel11_128_s_16x8);
6154 intermediateResults2_high_s_16x8 = vmlaq_s16(intermediateResults2_high_s_16x8, source1_high_s_16x8, factorChannel21_128_s_16x8);
6157 intermediateResults0_low_s_16x8 = vmlaq_s16(intermediateResults0_low_s_16x8, source2_low_s_16x8, factorChannel02_128_s_16x8);
6158 intermediateResults1_low_s_16x8 = vmlaq_s16(intermediateResults1_low_s_16x8, source2_low_s_16x8, factorChannel12_128_s_16x8);
6159 intermediateResults2_low_s_16x8 = vmlaq_s16(intermediateResults2_low_s_16x8, source2_low_s_16x8, factorChannel22_128_s_16x8);
6161 intermediateResults0_high_s_16x8 = vmlaq_s16(intermediateResults0_high_s_16x8, source2_high_s_16x8, factorChannel02_128_s_16x8);
6162 intermediateResults1_high_s_16x8 = vmlaq_s16(intermediateResults1_high_s_16x8, source2_high_s_16x8, factorChannel12_128_s_16x8);
6163 intermediateResults2_high_s_16x8 = vmlaq_s16(intermediateResults2_high_s_16x8, source2_high_s_16x8, factorChannel22_128_s_16x8);
6167 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, biasChannel0_128_s_16x8);
6168 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, biasChannel0_128_s_16x8);
6170 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, biasChannel1_128_s_16x8);
6171 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, biasChannel1_128_s_16x8);
6173 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, biasChannel2_128_s_16x8);
6174 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, biasChannel2_128_s_16x8);
6177 uint8x16x3_t results_u_8x16x3;
6180 results_u_8x16x3.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 7));
6181 results_u_8x16x3.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 7));
6182 results_u_8x16x3.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 7));
6185 vst3q_u8(target, results_u_8x16x3);
6188 OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8,
const uint8x16_t& channelValue3_u_8x16)
6190 ocean_assert(source !=
nullptr && target !=
nullptr);
6205 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6208 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
6209 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
6210 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
6212 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
6213 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
6214 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
6218 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_64_s_16x8);
6219 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_64_s_16x8);
6220 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_64_s_16x8);
6222 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_64_s_16x8);
6223 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_64_s_16x8);
6224 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_64_s_16x8);
6226 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel01_64_s_16x8));
6227 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel11_64_s_16x8));
6228 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel21_64_s_16x8));
6230 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel01_64_s_16x8));
6231 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel11_64_s_16x8));
6232 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel21_64_s_16x8));
6234 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel02_64_s_16x8));
6235 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel12_64_s_16x8));
6236 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel22_64_s_16x8));
6238 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel02_64_s_16x8));
6239 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel12_64_s_16x8));
6240 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel22_64_s_16x8));
6242 uint8x16x4_t results_u_8x16x4;
6245 results_u_8x16x4.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 6));
6246 results_u_8x16x4.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 6));
6247 results_u_8x16x4.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 6));
6248 results_u_8x16x4.val[3] = channelValue3_u_8x16;
6251 vst4q_u8(target, results_u_8x16x4);
6254 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2,
bool tUseFactorChannel3>
6257 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2 || tUseFactorChannel3,
"Invalid multiplication factors!");
6259 ocean_assert(source !=
nullptr && target !=
nullptr);
6279 uint8x8x4_t pixels_u_8x8x4 = vld4_u8(source);
6281 uint16x8_t intermediateResults_16x8;
6285 if constexpr (tUseFactorChannel0)
6287 intermediateResults_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel0_128_u_8x8);
6291 intermediateResults_16x8 = vdupq_n_u16(0u);
6296 if constexpr (tUseFactorChannel1)
6298 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[1], factorChannel1_128_u_8x8);
6303 if constexpr (tUseFactorChannel2)
6305 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[2], factorChannel2_128_u_8x8);
6310 if constexpr (tUseFactorChannel3)
6312 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[3], factorChannel3_128_u_8x8);
6316 uint8x8_t results_u_8x8 = vqrshrn_n_u16(intermediateResults_16x8, 7);
6319 vst1_u8(target, results_u_8x8);
6322 OCEAN_FORCE_INLINE
void FrameChannels::convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const uint8x8_t& factorChannel00_128_u_8x8,
const uint8x8_t& factorChannel10_128_u_8x8,
const uint8x8_t& factorChannel01_128_u_8x8,
const uint8x8_t& factorChannel11_128_u_8x8,
const uint8x8_t& factorChannel02_128_u_8x8,
const uint8x8_t& factorChannel12_128_u_8x8,
const uint8x8_t& factorChannel03_128_u_8x8,
const uint8x8_t& factorChannel13_128_u_8x8)
6324 ocean_assert(source !=
nullptr && target !=
nullptr);
6346 uint8x8x4_t pixels_u_8x8x4 = vld4_u8(source);
6348 uint16x8_t intermediateResultsChannel0_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel00_128_u_8x8);
6349 uint16x8_t intermediateResultsChannel1_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel10_128_u_8x8);
6351 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[1], factorChannel01_128_u_8x8);
6352 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[1], factorChannel11_128_u_8x8);
6354 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[2], factorChannel02_128_u_8x8);
6355 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[2], factorChannel12_128_u_8x8);
6357 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[3], factorChannel03_128_u_8x8);
6358 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[3], factorChannel13_128_u_8x8);
6360 uint8x8x2_t results_u_8x8x2;
6364 results_u_8x8x2.val[0] = vqrshrn_n_u16(intermediateResultsChannel0_16x8, 7);
6365 results_u_8x8x2.val[1] = vqrshrn_n_u16(intermediateResultsChannel1_16x8, 7);
6368 vst2_u8(target, results_u_8x8x2);
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition: FrameChannels.h:51
static bool premultipliedAlphaToStraightAlpha(const Frame &source, Frame &target, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
static bool zipChannels(const Frames &sourceFrames, Frame &targetFrame, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Zips/interleaves 1-channel images into one image with n-channels.
static bool separateTo1Channel(const Frame &sourceFrame, Frames &targetFrames, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static bool premultipliedAlphaToStraightAlpha(Frame &frame, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
static bool separateTo1Channel(const Frame &sourceFrame, const std::initializer_list< Frame * > &targetFrames, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static bool zipChannels(const std::initializer_list< Frame > &sourceFrames, Frame &targetFrame, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Zips/interleaves 1-channel images into one image with n-channels.
static bool straightAlphaToPremultipliedAlpha(Frame &frame, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
static bool straightAlphaToPremultipliedAlpha(const Frame &source, Frame &target, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
This class implements frame channel conversion, transformation and extraction functions.
Definition: FrameChannels.h:31
static void reverseChannelOrder(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Reverses the order of the channels of a frame with zipped pixel format.
Definition: FrameChannels.h:2840
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_1024_s_16x8, const __m128i &factorChannel10_1024_s_16x8, const __m128i &factorChannel20_1024_s_16x8, const __m128i &factorChannel01_1024_s_16x8, const __m128i &factorChannel11_1024_s_16x8, const __m128i &factorChannel21_1024_s_16x8, const __m128i &factorChannel02_1024_s_16x8, const __m128i &factorChannel12_1024_s_16x8, const __m128i &factorChannel22_1024_s_16x8, const __m128i &biasChannel0_1024_s_32x4, const __m128i &biasChannel1_1024_s_32x4, const __m128i &biasChannel2_1024_s_32x4)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition: FrameChannels.h:5340
static void addChannelValueRow(const T *source, T *target, const size_t size, const void *channelValueParameter)
Adds a channel to a given row with generic (zipped) pixel format and sets all values to a specified v...
Definition: FrameChannels.h:4288
static void shuffleRowChannelsAndSetLastChannelValue(const T *source, T *target, const size_t size, const void *options=nullptr)
Shuffles the channels of row pixels by application of a specified shuffle pattern and sets the last c...
Definition: FrameChannels.h:3747
static void separateTo1Channel(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition: FrameChannels.h:1847
static OCEAN_FORCE_INLINE void convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8, const uint8x16_t &channelValue3_u_8x16)
Converts 16 pixels with 3 channels per pixel to 16 pixels with 4 channels per pixel by a linear combi...
Definition: FrameChannels.h:6188
static void addChannelRow(const void **sources, void **targets, const unsigned int multipleRowIndex, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const void *options)
Adds a channel to a given row with generic (zipped) pixel format and copies the information of the ne...
Definition: FrameChannels.h:4188
static void shuffleChannelsAndSetLastChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Shuffles the channels of source frame and sets the last channel with constant value in the target fra...
Definition: FrameChannels.h:3910
static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactors0_128_u_16x8, const __m128i &multiplicationFactors1_128_u_16x8, const __m128i &multiplicationFactors2_128_u_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with one channel per pixel by a linear comb...
Definition: FrameChannels.h:5186
static void shuffleChannels(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Shuffles the channels of a frame by an arbitrary pattern.
Definition: FrameChannels.h:3882
static void convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *channelMultiplicationFactors_128)
Converts a row of pixels with 3 channels to pixels with one channel by a linear combination of the fo...
Definition: FrameChannels.h:4876
static void convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *channelMultiplicationFactors_128)
Converts a row of pixels with 4 channels to pixels with one channel by a linear combination of the fo...
Definition: FrameChannels.h:4949
static void copyChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Copies one channel from a given frame with zipped pixel format to another frame with zipped pixel for...
Definition: FrameChannels.h:2799
static void zipChannels(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition: FrameChannels.h:2598
static void straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t *const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
Definition: FrameChannels.h:4091
static void applyRowOperator(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const RowOperatorFunction< TSource, TTarget, tSourceChannels, tTargetChannels > &rowOperatorFunction, Worker *worker=nullptr)
Applies a row operator to all rows of a source image.
Definition: FrameChannels.h:4006
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition: FrameChannels.h:5767
static void convertRow4ChannelsTo2Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *multiplicationFactors_128)
Converts a row of pixels with 4 channels to pixels with two channel by a linear combination of the fo...
static void setChannelSubset(T *frame, const unsigned int width, const T value, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Sets one channel of a frame with one unique value.
Definition: FrameChannels.h:4487
static void applyBivariateOperatorSubset(const TSource0 *source0, const TSource1 *source1, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Generic bivariate pixel operations.
Definition: FrameChannels.h:4720
static void applyAdvancedPixelModifier(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Applies a specific modifier function on each pixel.
Definition: FrameChannels.h:3968
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition: FrameChannels.h:5713
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x4_t &factorChannel00_1024_s_16x4, const int16x4_t &factorChannel10_1024_s_16x4, const int16x4_t &factorChannel20_1024_s_16x4, const int16x4_t &factorChannel01_1024_s_16x4, const int16x4_t &factorChannel11_1024_s_16x4, const int16x4_t &factorChannel21_1024_s_16x4, const int16x4_t &factorChannel02_1024_s_16x4, const int16x4_t &factorChannel12_1024_s_16x4, const int16x4_t &factorChannel22_1024_s_16x4, const int32x4_t &biasChannel0_1024_s_32x4, const int32x4_t &biasChannel1_1024_s_32x4, const int32x4_t &biasChannel2_1024_s_32x4)
Converts 8 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear com...
Definition: FrameChannels.h:5980
static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel0_128_u_8x8, const uint8x8_t &factorChannel1_128_u_8x8, const uint8x8_t &factorChannel2_128_u_8x8, const uint8x8_t &factorChannel3_128_u_8x8)
Converts 8 pixels with 4 channels per pixel to 8 pixels with one channel per pixel by a linear combin...
static void addFirstChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be th...
Definition: FrameChannels.h:2711
static void addLastChannel(const T *source, const T *sourceNewChannel, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the ba...
Definition: FrameChannels.h:2731
static void removeFirstChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Removes the first channel from a given frame with zipped (generic) pixel format.
Definition: FrameChannels.h:2767
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x4_t &factorChannel00_1024_s_16x4, const int16x4_t &factorChannel10_1024_s_16x4, const int16x4_t &factorChannel20_1024_s_16x4, const int16x4_t &factorChannel01_1024_s_16x4, const int16x4_t &factorChannel11_1024_s_16x4, const int16x4_t &factorChannel21_1024_s_16x4, const int16x4_t &factorChannel02_1024_s_16x4, const int16x4_t &factorChannel12_1024_s_16x4, const int16x4_t &factorChannel22_1024_s_16x4, const int32x4_t &biasChannel0_1024_s_32x4, const int32x4_t &biasChannel1_1024_s_32x4, const int32x4_t &biasChannel2_1024_s_32x4)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition: FrameChannels.h:5889
static void addLastChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be th...
Definition: FrameChannels.h:2747
static void convertRow3ChannelsTo3Channels8BitPerChannel6BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void reverseRowPixelOrderInPlace(T *data, const size_t size)
Reverses/mirrors the order of pixels in a given row (or a memory block in general) in place.
Definition: FrameChannels.h:3017
static void applyRowOperatorSubset(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const RowOperatorFunction< TSource, TTarget, tSourceChannels, tTargetChannels > rowOperatorFunction, const unsigned int firstRow, const unsigned int numberRows)
Applies a row operator to a subset of all rows of a source image.
Definition: FrameChannels.h:4853
static void applyPixelModifier(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Applies a specific modifier function on each pixel.
Definition: FrameChannels.h:3954
static void narrowRow16BitPerChannelTo8BitPerChannel(const uint16_t *source, uint8_t *target, const size_t size, const void *unusedParameters=nullptr)
Narrows a row of pixels with 16 bit channels to pixels with 8 bit channels.
Definition: FrameChannels.h:4129
static void applyAdvancedPixelModifierSubset(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Applies a specific modifier function on each pixel.
Definition: FrameChannels.h:4614
static void convertRow4ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void shuffleRowChannels(const T *source, T *target, const size_t size, const void *unusedOptions=nullptr)
Shuffles the channels of row pixels by application of a specified shuffle pattern.
Definition: FrameChannels.h:3387
static void premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t *const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
Definition: FrameChannels.h:4053
static void convertRow3ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_128_s_16x8, const int16x8_t &factorChannel10_128_s_16x8, const int16x8_t &factorChannel20_128_s_16x8, const int16x8_t &factorChannel01_128_s_16x8, const int16x8_t &factorChannel11_128_s_16x8, const int16x8_t &factorChannel21_128_s_16x8, const int16x8_t &factorChannel02_128_s_16x8, const int16x8_t &factorChannel12_128_s_16x8, const int16x8_t &factorChannel22_128_s_16x8, const int16x8_t &biasChannel0_128_s_16x8, const int16x8_t &biasChannel1_128_s_16x8, const int16x8_t &biasChannel2_128_s_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with 3 channels per pixel by a linear combi...
Definition: FrameChannels.h:6106
static void convertRow3ChannelsTo4Channels8BitPerChannel6BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 4 channels by a linear combination of the thr...
static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME
Definition of a constant to specify that the number of channels are not known at compile time but at ...
Definition: FrameChannels.h:37
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_128_s_16x8, const int16x8_t &factorChannel10_128_s_16x8, const int16x8_t &factorChannel20_128_s_16x8, const int16x8_t &factorChannel01_128_s_16x8, const int16x8_t &factorChannel11_128_s_16x8, const int16x8_t &factorChannel21_128_s_16x8, const int16x8_t &factorChannel02_128_s_16x8, const int16x8_t &factorChannel12_128_s_16x8, const int16x8_t &factorChannel22_128_s_16x8, const int16x8_t &biasChannel0_128_s_16x8, const int16x8_t &biasChannel1_128_s_16x8, const int16x8_t &biasChannel2_128_s_16x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition: FrameChannels.h:5832
static void copyChannelRow(const T *source, T *target, const size_t size, const void *unusedParameters=nullptr)
Copies one channel from a source row to a target row with generic (zipped) pixel format.
Definition: FrameChannels.h:4327
static void reverseRowPixelOrder(const T *source, T *target, const size_t size)
Reverses/mirrors the order of pixels in a given row (or a memory block in general).
Definition: FrameChannels.h:2856
static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel0_128_u_8x8, const uint8x8_t &factorChannel1_128_u_8x8, const uint8x8_t &factorChannel2_128_u_8x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with one channel per pixel by a linear combin...
static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactors0123_128_s_32x)
Converts 16 pixels with 4 channels per pixel to 16 pixels with one channel per pixel by a linear comb...
Definition: FrameChannels.h:5477
static void removeLastChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Removes the last channel from a given frame with zipped (generic) pixel format.
Definition: FrameChannels.h:2783
static void transformGeneric(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker)
Transforms a frame with generic pixel format (with zipped pixel information) like RGB24 or YUV24,...
Definition: FrameChannels.h:4028
static void setChannel(T *frame, const unsigned int width, const unsigned int height, const T value, const unsigned int framePaddingElements, Worker *worker=nullptr)
Sets one channel of a frame with a specific unique value.
Definition: FrameChannels.h:2821
static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t *const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
Definition: FrameChannels.h:5113
static void narrow16BitPerChannelTo8BitPerChannel(const uint16_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Narrows 16 bit channels of a frame to 8 bit channels.
Definition: FrameChannels.h:3938
static void transformGenericSubset(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const RowReversePixelOrderFunction< void > rowReversePixelOrderFunction, const unsigned int bytesPerRow, const unsigned int sourceStrideBytes, const unsigned int targetStrideBytes, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a frame with generic pixel format (with zipped pixel information) like RGB24 o...
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_128_s_16x8, const __m128i &factorChannel10_128_s_16x8, const __m128i &factorChannel20_128_s_16x8, const __m128i &factorChannel01_128_s_16x8, const __m128i &factorChannel11_128_s_16x8, const __m128i &factorChannel21_128_s_16x8, const __m128i &factorChannel02_128_s_16x8, const __m128i &factorChannel12_128_s_16x8, const __m128i &factorChannel22_128_s_16x8, const __m128i &biasChannel0_s_16x8, const __m128i &biasChannel1_s_16x8, const __m128i &biasChannel2_s_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition: FrameChannels.h:5252
static void reverseRowChannelOrder(const T *source, T *target, const size_t size, const void *unusedOptions=nullptr)
Reverses/mirrors the order of channels in a given row (or a memory block in general).
Definition: FrameChannels.h:3195
static void convertRow3ChannelsTo3Channels8BitPerChannel10BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void applyBivariateOperator(const TSource0 *source0, const TSource1 *source1, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Generic bivariate pixel operations Applies bivariate per-pixel operators: C(y, x) = op(A(y,...
Definition: FrameChannels.h:3987
static void addFirstChannel(const T *source, const T *sourceNewChannel, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the fr...
Definition: FrameChannels.h:2695
static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel00_128_u_8x8, const uint8x8_t &factorChannel10_128_u_8x8, const uint8x8_t &factorChannel01_128_u_8x8, const uint8x8_t &factorChannel11_128_u_8x8, const uint8x8_t &factorChannel02_128_u_8x8, const uint8x8_t &factorChannel12_128_u_8x8, const uint8x8_t &factorChannel03_128_u_8x8, const uint8x8_t &factorChannel13_128_u_8x8)
Converts 8 pixels with 4 channels per pixel to 8 pixels with two channels per pixel by a linear combi...
Definition: FrameChannels.h:6322
static void separateTo1ChannelRuntime(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition: FrameChannels.h:4348
static void zipChannelsRuntime(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition: FrameChannels.h:4421
void(*)(const TSource *sourceRow, TTarget *targetRow, const unsigned int width, const unsigned int height, unsigned int rowIndex, const unsigned int sourceStrideElements, const unsigned int targetStrideElements) RowOperatorFunction
Definition of a function pointer to a function able to operate on an entire image row.
Definition: FrameChannels.h:43
static void applyPixelModifierSubset(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Applies a specific modifier function on each pixel.
Definition: FrameChannels.h:4510
static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t *const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
Definition: FrameChannels.h:5024
static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactorsChannel0_0123_128_s_16x8, const __m128i &multiplicationFactorsChannel1_0123_128_s_16x8)
Converts 16 pixels with 4 channels per pixel to 16 pixels with two channel per pixel by a linear comb...
Definition: FrameChannels.h:5540
This is the base class for all frame converter classes.
Definition: FrameConverter.h:32
ConversionFlag
Definition of individual conversion flags.
Definition: FrameConverter.h:39
@ CONVERT_NORMAL
Normal conversion, neither flips nor mirrors the image.
Definition: FrameConverter.h:49
@ CONVERT_FLIPPED_AND_MIRRORED
Rotated conversion, rotates the image by 180.0 degrees with anchor in the center of the image.
Definition: FrameConverter.h:82
@ CONVERT_MIRRORED
Mirrored conversion, exchanges left and right of the image (like in a mirror, mirroring around the y-...
Definition: FrameConverter.h:71
@ CONVERT_FLIPPED
Flipped conversion, exchanges top and bottom of the image (flipping around the x-axis).
Definition: FrameConverter.h:60
static void convertGenericPixelFormat(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const ConversionFlag flag, const RowConversionFunction< TSource, TTarget > rowConversionFunction, const RowReversePixelOrderInPlaceFunction< TTarget > targetReversePixelOrderInPlaceFunction, const bool areContinuous, const void *options, Worker *worker)
Converts a frame with generic pixel format (e.g., RGBA32, BGR24, YUV24, ...) to a frame with generic ...
Definition: FrameConverter.h:3211
void(*)(T *row, const size_t width) RowReversePixelOrderInPlaceFunction
Definition of a function pointer to a function able to reverse the order of pixels in an image row wi...
Definition: FrameConverter.h:589
void(*)(const T *inputRow, T *targetRow, const size_t width) RowReversePixelOrderFunction
Definition of a function pointer to a function able to reverse the order of pixels in an image row wi...
Definition: FrameConverter.h:580
static void convertArbitraryPixelFormat(const void **sources, void **targets, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int multipleRowsPerIteration, const MultipleRowsConversionFunction multipleRowsConversionFunction, const void *options, Worker *worker)
Converts a frame with arbitrary pixel format (e.g., Y_UV12, Y_VU12, YUYV16, ...) to a frame with arbi...
Definition: FrameConverter.h:3234
static OCEAN_FORCE_INLINE uint8x16_t cast16ElementsNEON(const float32x4_t &sourceA_f_32x4, const float32x4_t &sourceB_f_32x4, const float32x4_t &sourceC_f_32x4, const float32x4_t &sourceD_f_32x4)
Casts 16 float elements to 16 uint8_t elements.
Definition: NEON.h:1208
static __m128i divideByRightShiftSigned32Bit(const __m128i &value, const unsigned int rightShifts)
Divides eight signed 32 bit values by applying a right shift.
Definition: SSE.h:3108
static __m128i load128i(const void *const buffer)
Loads a 128i value from the memory.
Definition: SSE.h:3619
static void store128i(const __m128i &value, uint8_t *const buffer)
Stores a 128i value to the memory.
Definition: SSE.h:3764
static __m128i divideByRightShiftSigned16Bit(const __m128i &value, const unsigned int rightShifts)
Divides eight signed 16 bit values by applying a right shift.
Definition: SSE.h:3066
static OCEAN_FORCE_INLINE void multiplyInt8x16ToInt32x8AndAccumulate(const __m128i &values0, const __m128i &values1, __m128i &results0, __m128i &results1)
Multiplies 8 int16_t values with 8 int16_t values and adds the products to 8 int32_t values.
Definition: SSE.h:3909
static OCEAN_FORCE_INLINE void interleave3Channel8Bit48Elements(const __m128i &channel0, const __m128i &channel1, const __m128i &channel2, __m128i &interleavedA, __m128i &interleavedB, __m128i &interleavedC)
Interleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
Definition: SSE.h:3345
static OCEAN_FORCE_INLINE void reverseChannelOrder3Channel8Bit48Elements(const __m128i &interleaved0, const __m128i &interleaved1, const __m128i &interleaved2, __m128i &reversedInterleaved0, __m128i &reversedInterleaved1, __m128i &reversedInterleaved2)
Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channe...
Definition: SSE.h:3387
static __m128i removeHighBits16_8(const __m128i &value)
Removes the higher 8 bits of eight 16 bit elements.
Definition: SSE.h:3799
static OCEAN_FORCE_INLINE void deInterleave3Channel8Bit48Elements(const __m128i &interleavedA, const __m128i &interleavedB, const __m128i &interleavedC, __m128i &channel0, __m128i &channel1, __m128i &channel2)
Deinterleaves 48 elements of e.g., and image with 3 channels and 8 bit per element.
Definition: SSE.h:3304
static __m128i set128i(const unsigned long long high64, const unsigned long long low64)
Sets a 128i value by two 64 bit values.
Definition: SSE.h:3770
static OCEAN_FORCE_INLINE void reverseChannelOrder4Channel8Bit64Elements(const uint8_t *interleaved, uint8_t *reversedInterleaved)
Reverses the order of the channels of 16 pixels (64 elements) of an image with 4 interleaved channels...
Definition: SSE.h:3412
static OCEAN_FORCE_INLINE void multiplyInt8x16ToInt32x8(const __m128i &values0, const __m128i &values1, __m128i &products0, __m128i &products1)
Multiplies 8 int16_t values with 8 int16_t values and returns the products as 8 int32_t results.
Definition: SSE.h:3900
static OCEAN_FORCE_INLINE void reverseChannelOrder2Channel8Bit32Elements(const uint8_t *interleaved, uint8_t *reversedInterleaved)
Reverses the order of the channels of 16 pixels (32 elements) of an image with 2 interleaved channels...
Definition: SSE.h:3372
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition: Caller.h:2876
This class implements Ocean's image class.
Definition: Frame.h:1792
PixelFormat
Definition of all pixel formats available in the Ocean framework.
Definition: Frame.h:183
@ FORMAT_UNDEFINED
Undefined pixel format.
Definition: Frame.h:187
TypeMapperBySize< sizeof(T)>::Type Type
Definition of an invalid mapped data type.
Definition: DataType.h:501
This class implements a worker able to distribute function calls over different threads.
Definition: Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
std::vector< Frame > Frames
Definition of a vector holding padding frames.
Definition: Frame.h:1755
std::vector< Index32 > Indices32
Definition of a vector holding 32 bit index values.
Definition: Base.h:96
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15
Default definition of a type with tBytes bytes.
Definition: DataType.h:32