8#ifndef META_OCEAN_CV_FRAME_CHANNELS_H
9#define META_OCEAN_CV_FRAME_CHANNELS_H
37 static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME = 0u;
42 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
43 using RowOperatorFunction = void(*)(
const TSource* sourceRow, TTarget* targetRow,
const unsigned int width,
const unsigned int height,
unsigned int rowIndex,
const unsigned int sourceStrideElements,
const unsigned int targetStrideElements);
207 template <
typename TSource,
typename TTarget,
unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
208 static void separateTo1Channel(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements);
236 template <
typename TSource,
typename TTarget>
237 static void separateTo1Channel(
const TSource*
const sourceFrame,
const std::initializer_list<TTarget*>& targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int sourceFramePaddingElements,
const std::initializer_list<const unsigned int>& targetFramesPaddingElements);
265 template <
typename TSource,
typename TTarget,
unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
266 static void zipChannels(
const TSource*
const*
const sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements);
294 template <
typename TSource,
typename TTarget>
295 static void zipChannels(
const std::initializer_list<const TSource*>& sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const std::initializer_list<unsigned int>& sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements);
312 template <
typename T,
unsigned int tSourceChannels>
313 static inline void addFirstChannel(
const T* source,
const T* sourceNewChannel, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int sourceNewChannelPaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
329 template <
typename T,
unsigned int tSourceChannels>
330 static inline void addFirstChannelValue(
const T* source,
const T newChannelValue, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
347 template <
typename T,
unsigned int tSourceChannels>
348 static inline void addLastChannel(
const T* source,
const T* sourceNewChannel, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int sourceNewChannelPaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
364 template <
typename T,
unsigned int tSourceChannels>
365 static inline void addLastChannelValue(
const T* source,
const T newChannelValue, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
382 template <
typename T,
unsigned int tSourceChannels>
383 static inline void removeFirstChannel(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
400 template <
typename T,
unsigned int tSourceChannels>
401 static inline void removeLastChannel(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
418 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tSourceChannelIndex,
unsigned int tTargetChannelIndex>
419 static inline void copyChannel(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
433 template <
typename T,
unsigned int tChannel,
unsigned int tChannels>
434 static inline void setChannel(T* frame,
const unsigned int width,
const unsigned int height,
const T value,
const unsigned int framePaddingElements,
Worker* worker =
nullptr);
450 template <
typename T,
unsigned int tChannels>
451 static inline void reverseChannelOrder(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
477 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
478 static inline void shuffleChannels(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
505 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
506 static inline void shuffleChannelsAndSetLastChannelValue(
const T* source,
const T newChannelValue, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
520 template <
unsigned int tChannels>
521 static inline void narrow16BitPerChannelTo8BitPerChannel(
const uint16_t* source, uint8_t* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
535 template <
typename T,
unsigned int tChannels,
void (*tPixelFunction)(const T*, T*)>
536 static void applyPixelModifier(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
Worker* worker =
nullptr);
554 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tPixelFunction)(const TSource*, TTarget*)>
555 static void applyAdvancedPixelModifier(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
Worker* worker =
nullptr);
578 template <
typename TSource0,
typename TSource1,
typename TTarget,
typename TIntermediate,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
579 static void applyBivariateOperator(
const TSource0* source0,
const TSource1* source1, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int source0PaddingElements,
const unsigned int source1PaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
Worker* worker =
nullptr);
598 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
599 static void applyRowOperator(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels>& rowOperatorFunction,
Worker* worker =
nullptr);
615 template <
typename T,
unsigned int tChannels>
616 static inline void transformGeneric(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker);
629 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
630 static inline void premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t*
const frame,
const unsigned int width,
const unsigned int height,
const unsigned int framePaddingElements,
Worker* worker =
nullptr);
645 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
646 static inline void premultipliedAlphaToStraightAlpha8BitPerChannel(
const uint8_t*
const source, uint8_t*
const target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
659 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
660 static inline void straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t*
const frame,
const unsigned int width,
const unsigned int height,
const unsigned int framePaddingElements,
Worker* worker =
nullptr);
675 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
676 static inline void straightAlphaToPremultipliedAlpha8BitPerChannel(
const uint8_t*
const source, uint8_t*
const target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
686 template <
typename T,
unsigned int tChannels>
687 static void reverseRowPixelOrder(
const T* source, T* target,
const size_t size);
696 template <
typename T,
unsigned int tChannels>
697 static void reverseRowPixelOrderInPlace(T* data,
const size_t size);
708 template <
typename T,
unsigned int tChannels>
709 static void reverseRowChannelOrder(
const T* source, T* target,
const size_t size,
const void* unusedOptions =
nullptr);
731 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
732 static inline void shuffleRowChannels(
const T* source, T* target,
const size_t size,
const void* unusedOptions =
nullptr);
754 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
755 static inline void shuffleRowChannelsAndSetLastChannelValue(
const T* source, T* target,
const size_t size,
const void* options =
nullptr);
770 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2>
771 static void convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(
const uint8_t* source, uint8_t* target,
const size_t size,
const void* channelMultiplicationFactors_128);
872 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2,
bool tUseFactorChannel3>
873 static void convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(
const uint8_t* source, uint8_t* target,
const size_t size,
const void* channelMultiplicationFactors_128);
920 template <
unsigned int tChannels>
921 static void narrowRow16BitPerChannelTo8BitPerChannel(
const uint16_t* source, uint8_t* target,
const size_t size,
const void* unusedParameters =
nullptr);
937 template <
typename T,
unsigned int tSourceChannels,
bool tAddToFront>
938 static void addChannelRow(
const void** sources,
void** targets,
const unsigned int multipleRowIndex,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const void* options);
951 template <
typename T,
unsigned int tSourceChannels,
bool tAddToFront>
952 static void addChannelValueRow(
const T* source, T* target,
const size_t size,
const void* channelValueParameter);
966 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tSourceChannelIndex,
unsigned int tTargetChannelIndex>
967 static void copyChannelRow(
const T* source, T* target,
const size_t size,
const void* unusedParameters =
nullptr);
983 template <
typename TSource,
typename TTarget>
984 static void separateTo1ChannelRuntime(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements);
998 template <
typename TSource,
typename TTarget>
999 static void zipChannelsRuntime(
const TSource*
const*
const sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements);
1013 template <
typename T,
unsigned int tChannel,
unsigned int tChannels>
1014 static void setChannelSubset(T* frame,
const unsigned int width,
const T value,
const unsigned int framePaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1029 template <
typename T,
unsigned int tChannels,
void (*tPixelFunction)(const T*, T*)>
1030 static void applyPixelModifierSubset(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows);
1049 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tPixelFunction)(const TSource*, TTarget*)>
1050 static void applyAdvancedPixelModifierSubset(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows);
1073 template <
typename TSource0,
typename TSource1,
typename TTarget,
typename TIntermediate,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
1074 static void applyBivariateOperatorSubset(
const TSource0* source0,
const TSource1* source1, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int source0PaddingElements,
const unsigned int source1PaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows);
1094 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
1095 static void applyRowOperatorSubset(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourceStrideElements,
const unsigned int targetStrideElements,
const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels> rowOperatorFunction,
const unsigned int firstRow,
const unsigned int numberRows);
1111 static void transformGenericSubset(
const uint8_t* source, uint8_t* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const RowReversePixelOrderFunction<void> rowReversePixelOrderFunction,
const unsigned int bytesPerRow,
const unsigned int sourceStrideBytes,
const unsigned int targetStrideBytes,
const unsigned int firstRow,
const unsigned int numberRows);
1123 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
1124 static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t*
const frame,
const unsigned int width,
const unsigned int framePaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1138 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
1139 static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(
const uint8_t*
const source, uint8_t*
const target,
const unsigned int width,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1151 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
1152 static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t*
const frame,
const unsigned int width,
const unsigned int framePaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1166 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
1167 static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(
const uint8_t*
const source, uint8_t*
const target,
const unsigned int width,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1169#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1182 static OCEAN_FORCE_INLINE
void convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& multiplicationFactors0_128_u_16x8,
const __m128i& multiplicationFactors1_128_u_16x8,
const __m128i& multiplicationFactors2_128_u_16x8);
1211 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_128_s_16x8,
const __m128i& factorChannel10_128_s_16x8,
const __m128i& factorChannel20_128_s_16x8,
const __m128i& factorChannel01_128_s_16x8,
const __m128i& factorChannel11_128_s_16x8,
const __m128i& factorChannel21_128_s_16x8,
const __m128i& factorChannel02_128_s_16x8,
const __m128i& factorChannel12_128_s_16x8,
const __m128i& factorChannel22_128_s_16x8,
const __m128i& biasChannel0_s_16x8,
const __m128i& biasChannel1_s_16x8,
const __m128i& biasChannel2_s_16x8);
1240 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_1024_s_16x8,
const __m128i& factorChannel10_1024_s_16x8,
const __m128i& factorChannel20_1024_s_16x8,
const __m128i& factorChannel01_1024_s_16x8,
const __m128i& factorChannel11_1024_s_16x8,
const __m128i& factorChannel21_1024_s_16x8,
const __m128i& factorChannel02_1024_s_16x8,
const __m128i& factorChannel12_1024_s_16x8,
const __m128i& factorChannel22_1024_s_16x8,
const __m128i& biasChannel0_1024_s_32x4,
const __m128i& biasChannel1_1024_s_32x4,
const __m128i& biasChannel2_1024_s_32x4);
1251 static OCEAN_FORCE_INLINE
void convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& multiplicationFactors0123_128_s_32x);
1263 static OCEAN_FORCE_INLINE
void convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& multiplicationFactorsChannel0_0123_128_s_16x8,
const __m128i& multiplicationFactorsChannel1_0123_128_s_16x8);
1267#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1283 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2>
1313 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8);
1342 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8);
1371 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_128_s_16x8,
const int16x8_t& factorChannel10_128_s_16x8,
const int16x8_t& factorChannel20_128_s_16x8,
const int16x8_t& factorChannel01_128_s_16x8,
const int16x8_t& factorChannel11_128_s_16x8,
const int16x8_t& factorChannel21_128_s_16x8,
const int16x8_t& factorChannel02_128_s_16x8,
const int16x8_t& factorChannel12_128_s_16x8,
const int16x8_t& factorChannel22_128_s_16x8,
const int16x8_t& biasChannel0_128_s_16x8,
const int16x8_t& biasChannel1_128_s_16x8,
const int16x8_t& biasChannel2_128_s_16x8);
1400 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x4_t& factorChannel00_1024_s_16x4,
const int16x4_t& factorChannel10_1024_s_16x4,
const int16x4_t& factorChannel20_1024_s_16x4,
const int16x4_t& factorChannel01_1024_s_16x4,
const int16x4_t& factorChannel11_1024_s_16x4,
const int16x4_t& factorChannel21_1024_s_16x4,
const int16x4_t& factorChannel02_1024_s_16x4,
const int16x4_t& factorChannel12_1024_s_16x4,
const int16x4_t& factorChannel22_1024_s_16x4,
const int32x4_t& biasChannel0_1024_s_32x4,
const int32x4_t& biasChannel1_1024_s_32x4,
const int32x4_t& biasChannel2_1024_s_32x4);
1429 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x4_t& factorChannel00_1024_s_16x4,
const int16x4_t& factorChannel10_1024_s_16x4,
const int16x4_t& factorChannel20_1024_s_16x4,
const int16x4_t& factorChannel01_1024_s_16x4,
const int16x4_t& factorChannel11_1024_s_16x4,
const int16x4_t& factorChannel21_1024_s_16x4,
const int16x4_t& factorChannel02_1024_s_16x4,
const int16x4_t& factorChannel12_1024_s_16x4,
const int16x4_t& factorChannel22_1024_s_16x4,
const int32x4_t& biasChannel0_1024_s_32x4,
const int32x4_t& biasChannel1_1024_s_32x4,
const int32x4_t& biasChannel2_1024_s_32x4);
1458 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_128_s_16x8,
const int16x8_t& factorChannel10_128_s_16x8,
const int16x8_t& factorChannel20_128_s_16x8,
const int16x8_t& factorChannel01_128_s_16x8,
const int16x8_t& factorChannel11_128_s_16x8,
const int16x8_t& factorChannel21_128_s_16x8,
const int16x8_t& factorChannel02_128_s_16x8,
const int16x8_t& factorChannel12_128_s_16x8,
const int16x8_t& factorChannel22_128_s_16x8,
const int16x8_t& biasChannel0_128_s_16x8,
const int16x8_t& biasChannel1_128_s_16x8,
const int16x8_t& biasChannel2_128_s_16x8);
1490 static OCEAN_FORCE_INLINE
void convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8,
const uint8x16_t& channelValue3_u_8x16);
1508 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2,
bool tUseFactorChannel3>
1527 static OCEAN_FORCE_INLINE
void convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const uint8x8_t& factorChannel00_128_u_8x8,
const uint8x8_t& factorChannel10_128_u_8x8,
const uint8x8_t& factorChannel01_128_u_8x8,
const uint8x8_t& factorChannel11_128_u_8x8,
const uint8x8_t& factorChannel02_128_u_8x8,
const uint8x8_t& factorChannel12_128_u_8x8,
const uint8x8_t& factorChannel03_128_u_8x8,
const uint8x8_t& factorChannel13_128_u_8x8);
1533#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1536inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 2u>(
const uint8_t*
const sourceFrame, uint8_t*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
1538 ocean_assert(sourceFrame !=
nullptr);
1539 ocean_assert(targetFrames !=
nullptr);
1541 ocean_assert(width != 0u && height != 0u);
1542 ocean_assert(channels == 2u);
1544 constexpr unsigned int tChannels = 2u;
1546 bool allTargetFramesContinuous =
true;
1548 if (targetFramesPaddingElements !=
nullptr)
1550 for (
unsigned int n = 0u; n < tChannels; ++n)
1552 if (targetFramesPaddingElements[n] != 0u)
1554 allTargetFramesContinuous =
false;
1560 const uint8_t* source = sourceFrame;
1561 uint8_t* target0 = targetFrames[0];
1562 uint8_t* target1 = targetFrames[1];
1564 constexpr unsigned int tBlockSize = 16u;
1566 uint8x16x2_t source_8x16x2;
1568 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1570 const unsigned int pixels = width * height;
1571 const unsigned int blocks = pixels / tBlockSize;
1572 const unsigned int remaining = pixels % tBlockSize;
1574 for (
unsigned int n = 0u; n < blocks; ++n)
1576 source_8x16x2 = vld2q_u8(source);
1578 vst1q_u8(target0, source_8x16x2.val[0]);
1579 vst1q_u8(target1, source_8x16x2.val[1]);
1581 source += tBlockSize * tChannels;
1583 target0 += tBlockSize;
1584 target1 += tBlockSize;
1587 for (
unsigned int n = 0u; n < remaining; ++n)
1589 target0[n] = source[n * tChannels + 0u];
1590 target1[n] = source[n * tChannels + 1u];
1595 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[0];
1596 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[1];
1598 const unsigned int blocks = width / tBlockSize;
1599 const unsigned int remaining = width % tBlockSize;
1601 for (
unsigned int y = 0u; y < height; ++y)
1603 for (
unsigned int n = 0u; n < blocks; ++n)
1605 source_8x16x2 = vld2q_u8(source);
1607 vst1q_u8(target0, source_8x16x2.val[0]);
1608 vst1q_u8(target1, source_8x16x2.val[1]);
1610 source += tBlockSize * tChannels;
1612 target0 += tBlockSize;
1613 target1 += tBlockSize;
1616 for (
unsigned int n = 0u; n < remaining; ++n)
1618 target0[n] = source[n * tChannels + 0u];
1619 target1[n] = source[n * tChannels + 1u];
1622 source += remaining * tChannels + sourceFramePaddingElements;
1623 target0 += remaining + targetFrame0PaddingElements;
1624 target1 += remaining + targetFrame1PaddingElements;
1630inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 3u>(
const uint8_t*
const sourceFrame, uint8_t*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
1632 ocean_assert(sourceFrame !=
nullptr);
1633 ocean_assert(targetFrames !=
nullptr);
1635 ocean_assert(width != 0u && height != 0u);
1636 ocean_assert(channels == 3u);
1638 constexpr unsigned int tChannels = 3u;
1640 bool allTargetFramesContinuous =
true;
1642 if (targetFramesPaddingElements !=
nullptr)
1644 for (
unsigned int n = 0u; n < tChannels; ++n)
1646 if (targetFramesPaddingElements[n] != 0u)
1648 allTargetFramesContinuous =
false;
1654 const uint8_t* source = sourceFrame;
1655 uint8_t* target0 = targetFrames[0];
1656 uint8_t* target1 = targetFrames[1];
1657 uint8_t* target2 = targetFrames[2];
1659 constexpr unsigned int tBlockSize = 16u;
1661 uint8x16x3_t source_8x16x3;
1663 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1665 const unsigned int pixels = width * height;
1666 const unsigned int blocks = pixels / tBlockSize;
1667 const unsigned int remaining = pixels % tBlockSize;
1669 for (
unsigned int n = 0u; n < blocks; ++n)
1671 source_8x16x3 = vld3q_u8(source);
1673 vst1q_u8(target0, source_8x16x3.val[0]);
1674 vst1q_u8(target1, source_8x16x3.val[1]);
1675 vst1q_u8(target2, source_8x16x3.val[2]);
1677 source += tBlockSize * tChannels;
1679 target0 += tBlockSize;
1680 target1 += tBlockSize;
1681 target2 += tBlockSize;
1684 for (
unsigned int n = 0u; n < remaining; ++n)
1686 target0[n] = source[n * tChannels + 0u];
1687 target1[n] = source[n * tChannels + 1u];
1688 target2[n] = source[n * tChannels + 2u];
1693 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[0];
1694 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[1];
1695 const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[2];
1697 const unsigned int blocks = width / tBlockSize;
1698 const unsigned int remaining = width % tBlockSize;
1700 for (
unsigned int y = 0u; y < height; ++y)
1702 for (
unsigned int n = 0u; n < blocks; ++n)
1704 source_8x16x3 = vld3q_u8(source);
1706 vst1q_u8(target0, source_8x16x3.val[0]);
1707 vst1q_u8(target1, source_8x16x3.val[1]);
1708 vst1q_u8(target2, source_8x16x3.val[2]);
1710 source += tBlockSize * tChannels;
1712 target0 += tBlockSize;
1713 target1 += tBlockSize;
1714 target2 += tBlockSize;
1717 for (
unsigned int n = 0u; n < remaining; ++n)
1719 target0[n] = source[n * tChannels + 0u];
1720 target1[n] = source[n * tChannels + 1u];
1721 target2[n] = source[n * tChannels + 2u];
1724 source += remaining * tChannels + sourceFramePaddingElements;
1725 target0 += remaining + targetFrame0PaddingElements;
1726 target1 += remaining + targetFrame1PaddingElements;
1727 target2 += remaining + targetFrame2PaddingElements;
1733inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 4u>(
const uint8_t*
const sourceFrame, uint8_t*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
1735 ocean_assert(sourceFrame !=
nullptr);
1736 ocean_assert(targetFrames !=
nullptr);
1738 ocean_assert(width != 0u && height != 0u);
1739 ocean_assert(channels == 4u);
1741 constexpr unsigned int tChannels = 4u;
1743 bool allTargetFramesContinuous =
true;
1745 if (targetFramesPaddingElements !=
nullptr)
1747 for (
unsigned int n = 0u; n < tChannels; ++n)
1749 if (targetFramesPaddingElements[n] != 0u)
1751 allTargetFramesContinuous =
false;
1757 const uint8_t* source = sourceFrame;
1758 uint8_t* target0 = targetFrames[0];
1759 uint8_t* target1 = targetFrames[1];
1760 uint8_t* target2 = targetFrames[2];
1761 uint8_t* target3 = targetFrames[3];
1763 constexpr unsigned int tBlockSize = 16u;
1765 uint8x16x4_t source_8x16x4;
1767 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1769 const unsigned int pixels = width * height;
1770 const unsigned int blocks = pixels / tBlockSize;
1771 const unsigned int remaining = pixels % tBlockSize;
1773 for (
unsigned int n = 0u; n < blocks; ++n)
1775 source_8x16x4 = vld4q_u8(source);
1777 vst1q_u8(target0, source_8x16x4.val[0]);
1778 vst1q_u8(target1, source_8x16x4.val[1]);
1779 vst1q_u8(target2, source_8x16x4.val[2]);
1780 vst1q_u8(target3, source_8x16x4.val[3]);
1782 source += tBlockSize * tChannels;
1784 target0 += tBlockSize;
1785 target1 += tBlockSize;
1786 target2 += tBlockSize;
1787 target3 += tBlockSize;
1790 for (
unsigned int n = 0u; n < remaining; ++n)
1792 target0[n] = source[n * tChannels + 0u];
1793 target1[n] = source[n * tChannels + 1u];
1794 target2[n] = source[n * tChannels + 2u];
1795 target3[n] = source[n * tChannels + 3u];
1800 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[0];
1801 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[1];
1802 const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[2];
1803 const unsigned int targetFrame3PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[3];
1805 const unsigned int blocks = width / tBlockSize;
1806 const unsigned int remaining = width % tBlockSize;
1808 for (
unsigned int y = 0u; y < height; ++y)
1810 for (
unsigned int n = 0u; n < blocks; ++n)
1812 source_8x16x4 = vld4q_u8(source);
1814 vst1q_u8(target0, source_8x16x4.val[0]);
1815 vst1q_u8(target1, source_8x16x4.val[1]);
1816 vst1q_u8(target2, source_8x16x4.val[2]);
1817 vst1q_u8(target3, source_8x16x4.val[3]);
1819 source += tBlockSize * tChannels;
1821 target0 += tBlockSize;
1822 target1 += tBlockSize;
1823 target2 += tBlockSize;
1824 target3 += tBlockSize;
1827 for (
unsigned int n = 0u; n < remaining; ++n)
1829 target0[n] = source[n * tChannels + 0u];
1830 target1[n] = source[n * tChannels + 1u];
1831 target2[n] = source[n * tChannels + 2u];
1832 target3[n] = source[n * tChannels + 3u];
1835 source += remaining * tChannels + sourceFramePaddingElements;
1836 target0 += remaining + targetFrame0PaddingElements;
1837 target1 += remaining + targetFrame1PaddingElements;
1838 target2 += remaining + targetFrame2PaddingElements;
1839 target3 += remaining + targetFrame3PaddingElements;
1846template <
typename TSource,
typename TTarget,
unsigned int tChannels>
1847void FrameChannels::separateTo1Channel(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
1849 ocean_assert(sourceFrame !=
nullptr);
1850 ocean_assert(targetFrames !=
nullptr);
1852 ocean_assert(width != 0u && height != 0u);
1858 separateTo1ChannelRuntime<TSource, TTarget>(sourceFrame, targetFrames, width, height, channels, sourceFramePaddingElements, targetFramesPaddingElements);
1863 for (
unsigned int c = 0u; c < tChannels; ++c)
1865 ocean_assert(targetFrames[c] !=
nullptr);
1869 if (sourceFramePaddingElements == 0u && targetFramesPaddingElements ==
nullptr)
1871 for (
unsigned int n = 0u; n < width * height; ++n)
1873 for (
unsigned int c = 0u; c < tChannels; ++c)
1875 targetFrames[c][n] = TTarget(sourceFrame[n * tChannels + c]);
1879 else if (targetFramesPaddingElements ==
nullptr)
1881 ocean_assert(sourceFramePaddingElements != 0u);
1883 const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
1885 for (
unsigned int y = 0u; y < height; ++y)
1887 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
1889 const unsigned int targetRowOffset = y * width;
1891 for (
unsigned int x = 0u; x < width; ++x)
1893 for (
unsigned int c = 0u; c < tChannels; ++c)
1895 *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * tChannels + c));
1902 const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
1904 Indices32 targetFrameStrideElements(tChannels);
1906 for (
unsigned int c = 0u; c < tChannels; ++c)
1908 targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
1911 for (
unsigned int y = 0u; y < height; ++y)
1913 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
1915 for (
unsigned int x = 0u; x < width; ++x)
1917 for (
unsigned int c = 0u; c < tChannels; ++c)
1919 *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * tChannels + c));
1926template <
typename TSource,
typename TTarget>
1927void FrameChannels::separateTo1Channel(
const TSource*
const sourceFrame,
const std::initializer_list<TTarget*>& targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int sourceFramePaddingElements,
const std::initializer_list<const unsigned int>& targetFramesPaddingElements)
1929 ocean_assert(targetFrames.size() >= 1);
1930 ocean_assert(targetFramesPaddingElements.size() == 0 || targetFrames.size() == targetFramesPaddingElements.size());
1932 if (targetFrames.size() == 2)
1934 separateTo1Channel<TSource, TTarget, 2u>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
1936 else if (targetFrames.size() == 3)
1938 separateTo1Channel<TSource, TTarget, 3u>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
1940 else if (targetFrames.size() == 4)
1942 separateTo1Channel<TSource, TTarget, 4u>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
1946 separateTo1Channel<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
1950#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1953inline void FrameChannels::zipChannels<uint8_t, uint8_t, 2u>(
const uint8_t*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
1955 ocean_assert(sourceFrames !=
nullptr);
1956 ocean_assert(targetFrame !=
nullptr);
1958 ocean_assert(width != 0u && height != 0u);
1959 ocean_assert(channels == 2u);
1961 constexpr unsigned int tChannels = 2u;
1963 bool allSourceFramesContinuous =
true;
1965 if (sourceFramesPaddingElements !=
nullptr)
1967 for (
unsigned int n = 0u; n < tChannels; ++n)
1969 if (sourceFramesPaddingElements[n] != 0u)
1971 allSourceFramesContinuous =
false;
1977 const uint8_t* source0 = sourceFrames[0];
1978 const uint8_t* source1 = sourceFrames[1];
1979 uint8_t* target = targetFrame;
1981 constexpr unsigned int tBlockSize = 16u;
1983 uint8x16x2_t source_8x16x2;
1985 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
1987 const unsigned int pixels = width * height;
1988 const unsigned int blocks = pixels / tBlockSize;
1989 const unsigned int remaining = pixels % tBlockSize;
1991 for (
unsigned int n = 0u; n < blocks; ++n)
1993 source_8x16x2.val[0] = vld1q_u8(source0);
1994 source_8x16x2.val[1] = vld1q_u8(source1);
1996 vst2q_u8(target, source_8x16x2);
1998 source0 += tBlockSize;
1999 source1 += tBlockSize;
2001 target += tBlockSize * tChannels;
2004 for (
unsigned int n = 0u; n < remaining; ++n)
2006 target[n * tChannels + 0u] = source0[n];
2007 target[n * tChannels + 1u] = source1[n];
2012 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2013 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2015 const unsigned int blocks = width / tBlockSize;
2016 const unsigned int remaining = width % tBlockSize;
2018 for (
unsigned int y = 0u; y < height; ++y)
2020 for (
unsigned int n = 0u; n < blocks; ++n)
2022 source_8x16x2.val[0] = vld1q_u8(source0);
2023 source_8x16x2.val[1] = vld1q_u8(source1);
2025 vst2q_u8(target, source_8x16x2);
2027 source0 += tBlockSize;
2028 source1 += tBlockSize;
2030 target += tBlockSize * tChannels;
2033 for (
unsigned int n = 0u; n < remaining; ++n)
2035 target[n * tChannels + 0u] = source0[n];
2036 target[n * tChannels + 1u] = source1[n];
2039 source0 += remaining + sourceFrame0PaddingElements;
2040 source1 += remaining + sourceFrame1PaddingElements;
2041 target += remaining * tChannels + targetFramePaddingElements;
2047inline void FrameChannels::zipChannels<uint8_t, uint8_t, 3u>(
const uint8_t*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2049 ocean_assert(sourceFrames !=
nullptr);
2050 ocean_assert(targetFrame !=
nullptr);
2052 ocean_assert(width != 0u && height != 0u);
2053 ocean_assert(channels == 3u);
2055 constexpr unsigned int tChannels = 3u;
2057 bool allSourceFramesContinuous =
true;
2059 if (sourceFramesPaddingElements !=
nullptr)
2061 for (
unsigned int n = 0u; n < tChannels; ++n)
2063 if (sourceFramesPaddingElements[n] != 0u)
2065 allSourceFramesContinuous =
false;
2071 const uint8_t* source0 = sourceFrames[0];
2072 const uint8_t* source1 = sourceFrames[1];
2073 const uint8_t* source2 = sourceFrames[2];
2074 uint8_t* target = targetFrame;
2076 constexpr unsigned int tBlockSize = 16u;
2078 uint8x16x3_t source_8x16x3;
2080 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2082 const unsigned int pixels = width * height;
2083 const unsigned int blocks = pixels / tBlockSize;
2084 const unsigned int remaining = pixels % tBlockSize;
2086 for (
unsigned int n = 0u; n < blocks; ++n)
2088 source_8x16x3.val[0] = vld1q_u8(source0);
2089 source_8x16x3.val[1] = vld1q_u8(source1);
2090 source_8x16x3.val[2] = vld1q_u8(source2);
2092 vst3q_u8(target, source_8x16x3);
2094 source0 += tBlockSize;
2095 source1 += tBlockSize;
2096 source2 += tBlockSize;
2098 target += tBlockSize * tChannels;
2101 for (
unsigned int n = 0u; n < remaining; ++n)
2103 target[n * tChannels + 0u] = source0[n];
2104 target[n * tChannels + 1u] = source1[n];
2105 target[n * tChannels + 2u] = source2[n];
2110 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2111 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2112 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[2];
2114 const unsigned int blocks = width / tBlockSize;
2115 const unsigned int remaining = width % tBlockSize;
2117 for (
unsigned int y = 0u; y < height; ++y)
2119 for (
unsigned int n = 0u; n < blocks; ++n)
2121 source_8x16x3.val[0] = vld1q_u8(source0);
2122 source_8x16x3.val[1] = vld1q_u8(source1);
2123 source_8x16x3.val[2] = vld1q_u8(source2);
2125 vst3q_u8(target, source_8x16x3);
2127 source0 += tBlockSize;
2128 source1 += tBlockSize;
2129 source2 += tBlockSize;
2131 target += tBlockSize * tChannels;
2134 for (
unsigned int n = 0u; n < remaining; ++n)
2136 target[n * tChannels + 0u] = source0[n];
2137 target[n * tChannels + 1u] = source1[n];
2138 target[n * tChannels + 2u] = source2[n];
2141 source0 += remaining + sourceFrame0PaddingElements;
2142 source1 += remaining + sourceFrame1PaddingElements;
2143 source2 += remaining + sourceFrame2PaddingElements;
2144 target += remaining * tChannels + targetFramePaddingElements;
2150inline void FrameChannels::zipChannels<uint8_t, uint8_t, 4u>(
const uint8_t*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2152 ocean_assert(sourceFrames !=
nullptr);
2153 ocean_assert(targetFrame !=
nullptr);
2155 ocean_assert(width != 0u && height != 0u);
2156 ocean_assert(channels == 4u);
2158 constexpr unsigned int tChannels = 4u;
2160 bool allSourceFramesContinuous =
true;
2162 if (sourceFramesPaddingElements !=
nullptr)
2164 for (
unsigned int n = 0u; n < tChannels; ++n)
2166 if (sourceFramesPaddingElements[n] != 0u)
2168 allSourceFramesContinuous =
false;
2174 const uint8_t* source0 = sourceFrames[0];
2175 const uint8_t* source1 = sourceFrames[1];
2176 const uint8_t* source2 = sourceFrames[2];
2177 const uint8_t* source3 = sourceFrames[3];
2178 uint8_t* target = targetFrame;
2180 constexpr unsigned int tBlockSize = 16u;
2182 uint8x16x4_t source_8x16x4;
2184 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2186 const unsigned int pixels = width * height;
2187 const unsigned int blocks = pixels / tBlockSize;
2188 const unsigned int remaining = pixels % tBlockSize;
2190 for (
unsigned int n = 0u; n < blocks; ++n)
2192 source_8x16x4.val[0] = vld1q_u8(source0);
2193 source_8x16x4.val[1] = vld1q_u8(source1);
2194 source_8x16x4.val[2] = vld1q_u8(source2);
2195 source_8x16x4.val[3] = vld1q_u8(source3);
2197 vst4q_u8(target, source_8x16x4);
2199 source0 += tBlockSize;
2200 source1 += tBlockSize;
2201 source2 += tBlockSize;
2202 source3 += tBlockSize;
2204 target += tBlockSize * tChannels;
2207 for (
unsigned int n = 0u; n < remaining; ++n)
2209 target[n * tChannels + 0u] = source0[n];
2210 target[n * tChannels + 1u] = source1[n];
2211 target[n * tChannels + 2u] = source2[n];
2212 target[n * tChannels + 3u] = source3[n];
2217 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2218 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2219 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[2];
2220 const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[3];
2222 const unsigned int blocks = width / tBlockSize;
2223 const unsigned int remaining = width % tBlockSize;
2225 for (
unsigned int y = 0u; y < height; ++y)
2227 for (
unsigned int n = 0u; n < blocks; ++n)
2229 source_8x16x4.val[0] = vld1q_u8(source0);
2230 source_8x16x4.val[1] = vld1q_u8(source1);
2231 source_8x16x4.val[2] = vld1q_u8(source2);
2232 source_8x16x4.val[3] = vld1q_u8(source3);
2234 vst4q_u8(target, source_8x16x4);
2236 source0 += tBlockSize;
2237 source1 += tBlockSize;
2238 source2 += tBlockSize;
2239 source3 += tBlockSize;
2241 target += tBlockSize * tChannels;
2244 for (
unsigned int n = 0u; n < remaining; ++n)
2246 target[n * tChannels + 0u] = source0[n];
2247 target[n * tChannels + 1u] = source1[n];
2248 target[n * tChannels + 2u] = source2[n];
2249 target[n * tChannels + 3u] = source3[n];
2252 source0 += remaining + sourceFrame0PaddingElements;
2253 source1 += remaining + sourceFrame1PaddingElements;
2254 source2 += remaining + sourceFrame2PaddingElements;
2255 source3 += remaining + sourceFrame3PaddingElements;
2256 target += remaining * tChannels + targetFramePaddingElements;
2262inline void FrameChannels::zipChannels<float, uint8_t, 2u>(
const float*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2264 ocean_assert(sourceFrames !=
nullptr);
2265 ocean_assert(targetFrame !=
nullptr);
2267 ocean_assert(width != 0u && height != 0u);
2268 ocean_assert(channels == 2u);
2270 constexpr unsigned int tChannels = 2u;
2272 bool allSourceFramesContinuous =
true;
2274 if (sourceFramesPaddingElements !=
nullptr)
2276 for (
unsigned int n = 0u; n < tChannels; ++n)
2278 if (sourceFramesPaddingElements[n] != 0u)
2280 allSourceFramesContinuous =
false;
2286 const float* source0 = sourceFrames[0];
2287 const float* source1 = sourceFrames[1];
2288 uint8_t* target = targetFrame;
2290 constexpr unsigned int tBlockSize = 16u;
2292 uint8x16x2_t target_8x16x2;
2294 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2296 const unsigned int pixels = width * height;
2297 const unsigned int blocks = pixels / tBlockSize;
2298 const unsigned int remaining = pixels % tBlockSize;
2300 for (
unsigned int n = 0u; n < blocks; ++n)
2305 vst2q_u8(target, target_8x16x2);
2307 source0 += tBlockSize;
2308 source1 += tBlockSize;
2310 target += tBlockSize * tChannels;
2313 for (
unsigned int n = 0u; n < remaining; ++n)
2315 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2316 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2318 target[n * tChannels + 0u] = uint8_t(source0[n]);
2319 target[n * tChannels + 1u] = uint8_t(source1[n]);
2324 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2325 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2327 const unsigned int blocks = width / tBlockSize;
2328 const unsigned int remaining = width % tBlockSize;
2330 for (
unsigned int y = 0u; y < height; ++y)
2332 for (
unsigned int n = 0u; n < blocks; ++n)
2337 vst2q_u8(target, target_8x16x2);
2339 source0 += tBlockSize;
2340 source1 += tBlockSize;
2342 target += tBlockSize * tChannels;
2345 for (
unsigned int n = 0u; n < remaining; ++n)
2347 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2348 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2350 target[n * tChannels + 0u] = uint8_t(source0[n]);
2351 target[n * tChannels + 1u] = uint8_t(source1[n]);
2354 source0 += remaining + sourceFrame0PaddingElements;
2355 source1 += remaining + sourceFrame1PaddingElements;
2356 target += remaining * tChannels + targetFramePaddingElements;
2362inline void FrameChannels::zipChannels<float, uint8_t, 3u>(
const float*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2364 ocean_assert(sourceFrames !=
nullptr);
2365 ocean_assert(targetFrame !=
nullptr);
2367 ocean_assert(width != 0u && height != 0u);
2368 ocean_assert(channels == 3u);
2370 constexpr unsigned int tChannels = 3u;
2372 bool allSourceFramesContinuous =
true;
2374 if (sourceFramesPaddingElements !=
nullptr)
2376 for (
unsigned int n = 0u; n < tChannels; ++n)
2378 if (sourceFramesPaddingElements[n] != 0u)
2380 allSourceFramesContinuous =
false;
2386 const float* source0 = sourceFrames[0];
2387 const float* source1 = sourceFrames[1];
2388 const float* source2 = sourceFrames[2];
2389 uint8_t* target = targetFrame;
2391 constexpr unsigned int tBlockSize = 16u;
2393 uint8x16x3_t target_8x16x3;
2395 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2397 const unsigned int pixels = width * height;
2398 const unsigned int blocks = pixels / tBlockSize;
2399 const unsigned int remaining = pixels % tBlockSize;
2401 for (
unsigned int n = 0u; n < blocks; ++n)
2407 vst3q_u8(target, target_8x16x3);
2409 source0 += tBlockSize;
2410 source1 += tBlockSize;
2411 source2 += tBlockSize;
2413 target += tBlockSize * tChannels;
2416 for (
unsigned int n = 0u; n < remaining; ++n)
2418 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2419 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2420 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2422 target[n * tChannels + 0u] = uint8_t(source0[n]);
2423 target[n * tChannels + 1u] = uint8_t(source1[n]);
2424 target[n * tChannels + 2u] = uint8_t(source2[n]);
2429 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2430 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2431 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[2];
2433 const unsigned int blocks = width / tBlockSize;
2434 const unsigned int remaining = width % tBlockSize;
2436 for (
unsigned int y = 0u; y < height; ++y)
2438 for (
unsigned int n = 0u; n < blocks; ++n)
2445 vst3q_u8(target, target_8x16x3);
2447 source0 += tBlockSize;
2448 source1 += tBlockSize;
2449 source2 += tBlockSize;
2451 target += tBlockSize * tChannels;
2454 for (
unsigned int n = 0u; n < remaining; ++n)
2456 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2457 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2458 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2460 target[n * tChannels + 0u] = uint8_t(source0[n]);
2461 target[n * tChannels + 1u] = uint8_t(source1[n]);
2462 target[n * tChannels + 2u] = uint8_t(source2[n]);
2465 source0 += remaining + sourceFrame0PaddingElements;
2466 source1 += remaining + sourceFrame1PaddingElements;
2467 source2 += remaining + sourceFrame2PaddingElements;
2468 target += remaining * tChannels + targetFramePaddingElements;
2474inline void FrameChannels::zipChannels<float, uint8_t, 4u>(
const float*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2476 ocean_assert(sourceFrames !=
nullptr);
2477 ocean_assert(targetFrame !=
nullptr);
2479 ocean_assert(width != 0u && height != 0u);
2480 ocean_assert(channels == 4u);
2482 constexpr unsigned int tChannels = 4u;
2484 bool allSourceFramesContinuous =
true;
2486 if (sourceFramesPaddingElements !=
nullptr)
2488 for (
unsigned int n = 0u; n < tChannels; ++n)
2490 if (sourceFramesPaddingElements[n] != 0u)
2492 allSourceFramesContinuous =
false;
2498 const float* source0 = sourceFrames[0];
2499 const float* source1 = sourceFrames[1];
2500 const float* source2 = sourceFrames[2];
2501 const float* source3 = sourceFrames[3];
2502 uint8_t* target = targetFrame;
2504 constexpr unsigned int tBlockSize = 16u;
2506 uint8x16x4_t target_8x16x4;
2508 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2510 const unsigned int pixels = width * height;
2511 const unsigned int blocks = pixels / tBlockSize;
2512 const unsigned int remaining = pixels % tBlockSize;
2514 for (
unsigned int n = 0u; n < blocks; ++n)
2521 vst4q_u8(target, target_8x16x4);
2523 source0 += tBlockSize;
2524 source1 += tBlockSize;
2525 source2 += tBlockSize;
2526 source3 += tBlockSize;
2528 target += tBlockSize * tChannels;
2531 for (
unsigned int n = 0u; n < remaining; ++n)
2533 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2534 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2535 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2536 ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
2538 target[n * tChannels + 0u] = uint8_t(source0[n]);
2539 target[n * tChannels + 1u] = uint8_t(source1[n]);
2540 target[n * tChannels + 2u] = uint8_t(source2[n]);
2541 target[n * tChannels + 3u] = uint8_t(source3[n]);
2546 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2547 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2548 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[2];
2549 const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[3];
2551 const unsigned int blocks = width / tBlockSize;
2552 const unsigned int remaining = width % tBlockSize;
2554 for (
unsigned int y = 0u; y < height; ++y)
2556 for (
unsigned int n = 0u; n < blocks; ++n)
2563 vst4q_u8(target, target_8x16x4);
2565 source0 += tBlockSize;
2566 source1 += tBlockSize;
2567 source2 += tBlockSize;
2568 source3 += tBlockSize;
2570 target += tBlockSize * tChannels;
2573 for (
unsigned int n = 0u; n < remaining; ++n)
2575 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2576 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2577 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2578 ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
2580 target[n * tChannels + 0u] = uint8_t(source0[n]);
2581 target[n * tChannels + 1u] = uint8_t(source1[n]);
2582 target[n * tChannels + 2u] = uint8_t(source2[n]);
2583 target[n * tChannels + 3u] = uint8_t(source3[n]);
2586 source0 += remaining + sourceFrame0PaddingElements;
2587 source1 += remaining + sourceFrame1PaddingElements;
2588 source2 += remaining + sourceFrame2PaddingElements;
2589 source3 += remaining + sourceFrame3PaddingElements;
2590 target += remaining * tChannels + targetFramePaddingElements;
2597template <
typename TSource,
typename TTarget,
unsigned int tChannels>
2598void FrameChannels::zipChannels(
const TSource*
const* sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2600 ocean_assert(sourceFrames !=
nullptr);
2601 ocean_assert(targetFrame !=
nullptr);
2603 ocean_assert(width != 0u && height != 0u);
2609 zipChannelsRuntime<TSource, TTarget>(sourceFrames, targetFrame, width, height, channels, sourceFramesPaddingElements, targetFramePaddingElements);
2613 bool allSourceFramesContinuous =
true;
2615 if (sourceFramesPaddingElements !=
nullptr)
2617 for (
unsigned int n = 0u; n < tChannels; ++n)
2619 if (sourceFramesPaddingElements[n] != 0u)
2621 allSourceFramesContinuous =
false;
2627 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2629 for (
unsigned int n = 0u; n < width * height; ++n)
2631 for (
unsigned int c = 0u; c < tChannels; ++c)
2633 targetFrame[n * tChannels + c] = TTarget(sourceFrames[c][n]);
2639 const unsigned int targetFrameStrideElements = width * tChannels + targetFramePaddingElements;
2641 Indices32 sourceFrameStrideElements(tChannels);
2643 for (
unsigned int c = 0u; c < tChannels; ++c)
2645 if (sourceFramesPaddingElements ==
nullptr)
2647 sourceFrameStrideElements[c] = width;
2651 sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
2655 for (
unsigned int y = 0u; y < height; ++y)
2657 TTarget*
const targetRow = targetFrame + y * targetFrameStrideElements;
2659 for (
unsigned int x = 0u; x < width; ++x)
2661 for (
unsigned int c = 0u; c < tChannels; ++c)
2663 *(targetRow + x * tChannels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x));
2670template <
typename TSource,
typename TTarget>
2671void FrameChannels::zipChannels(
const std::initializer_list<const TSource*>& sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const std::initializer_list<unsigned int>& sourceFramePaddingElements,
const unsigned int targetFramePaddingElements)
2673 ocean_assert(sourceFrames.size() >= 1);
2674 ocean_assert(sourceFramePaddingElements.size() == 0 || sourceFrames.size() == sourceFramePaddingElements.size());
2676 if (sourceFrames.size() == 2)
2678 zipChannels<TSource, TTarget, 2u>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2680 else if (sourceFrames.size() == 3)
2682 zipChannels<TSource, TTarget, 3u>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2684 else if (sourceFrames.size() == 4)
2686 zipChannels<TSource, TTarget, 4u>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2690 zipChannels<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2694template <
typename T,
unsigned int tSourceChannels>
2695inline void FrameChannels::addFirstChannel(
const T* source,
const T* sourceNewChannel, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int sourceNewChannelPaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
2697 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
2699 ocean_assert(source !=
nullptr && sourceNewChannel !=
nullptr && target !=
nullptr);
2700 ocean_assert(source != target);
2701 ocean_assert(width >= 1u && height >= 1u);
2703 const unsigned int options[3] = {sourcePaddingElements, sourceNewChannelPaddingElements, targetPaddingElements};
2705 const void* sources[2] = {source, sourceNewChannel};
2707 FrameConverter::convertArbitraryPixelFormat(sources, (
void**)&target, width, height, conversionFlag, 1u, FrameChannels::addChannelRow<T, tSourceChannels, true>, options, worker);
2710template <
typename T,
unsigned int tSourceChannels>
2713 static_assert(tSourceChannels >= 1u,
"Invalid channel number!");
2715 ocean_assert(source !=
nullptr && target !=
nullptr);
2716 ocean_assert(width >= 1u && height >= 1u);
2718 const unsigned int targetChannels = tSourceChannels + 1u;
2720 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2721 const unsigned int targetStrideElements = width * targetChannels + targetPaddingElements;
2723 const void* channelValueParameter = (
const void*)(&newChannelValue);
2725 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2727 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::addChannelValueRow<T, tSourceChannels, true>, FrameChannels::reverseRowPixelOrderInPlace<T, targetChannels>, areContinuous, channelValueParameter, worker);
2730template <
typename T,
unsigned int tSourceChannels>
2731inline void FrameChannels::addLastChannel(
const T* source,
const T* sourceNewChannel, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int sourceNewChannelPaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
2733 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
2735 ocean_assert(source !=
nullptr && sourceNewChannel !=
nullptr && target !=
nullptr);
2736 ocean_assert(source != target);
2737 ocean_assert(width >= 1u && height >= 1u);
2739 const unsigned int options[3] = {sourcePaddingElements, sourceNewChannelPaddingElements, targetPaddingElements};
2741 const void* sources[2] = {source, sourceNewChannel};
2743 FrameConverter::convertArbitraryPixelFormat(sources, (
void**)&target, width, height, conversionFlag, 1u, FrameChannels::addChannelRow<T, tSourceChannels, false>, options, worker);
2746template <
typename T,
unsigned int tSourceChannels>
2747inline void FrameChannels::addLastChannelValue(
const T* source,
const T newChannelValue, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
2749 static_assert(tSourceChannels >= 1u,
"Invalid channel number!");
2751 ocean_assert(source !=
nullptr && target !=
nullptr);
2752 ocean_assert(width >= 1u && height >= 1u);
2754 const unsigned int targetChannels = tSourceChannels + 1u;
2756 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2757 const unsigned int targetStrideElements = width * targetChannels + targetPaddingElements;
2759 const void* channelValueParameter = (
const void*)(&newChannelValue);
2761 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2763 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::addChannelValueRow<T, tSourceChannels, false>, FrameChannels::reverseRowPixelOrderInPlace<T, targetChannels>, areContinuous, channelValueParameter, worker);
2766template <
typename T,
unsigned int tSourceChannels>
2769 static_assert(tSourceChannels >= 2u && tSourceChannels <= 8u,
"Invalid channel number!");
2771 ocean_assert(source !=
nullptr && target !=
nullptr);
2772 ocean_assert(width >= 1u && height >= 1u);
2774 const unsigned int shufflePatternMax = 0x07654321u;
2775 const unsigned int mask = 0xFFFFFFFFu >> ((8u - tSourceChannels + 1u) * 4u);
2777 const unsigned int shufflePattern = shufflePatternMax & mask;
2779 FrameChannels::shuffleChannels<T, tSourceChannels, tSourceChannels - 1u, shufflePattern>(source, target, width, height, conversionFlag, sourcePaddingElements, targetPaddingElements, worker);
2782template <
typename T,
unsigned int tSourceChannels>
2785 static_assert(tSourceChannels >= 2u && tSourceChannels <= 8u,
"Invalid channel number!");
2787 ocean_assert(source !=
nullptr && target !=
nullptr);
2788 ocean_assert(width >= 1u && height >= 1u);
2790 const unsigned int shufflePatternMax = 0x76543210u;
2791 const unsigned int mask = 0xFFFFFFFFu >> ((8u - tSourceChannels + 1u) * 4u);
2793 const unsigned int shufflePattern = shufflePatternMax & mask;
2795 FrameChannels::shuffleChannels<T, tSourceChannels, tSourceChannels - 1u, shufflePattern>(source, target, width, height, conversionFlag, sourcePaddingElements, targetPaddingElements, worker);
2798template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tSourceChannelIndex,
unsigned int tTargetChannelIndex>
2799inline void FrameChannels::copyChannel(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
2801 static_assert(tSourceChannels >= 1u,
"Invalid number of channels!");
2802 static_assert(tTargetChannels >= 1u,
"Invalid number of channels!");
2804 static_assert(tSourceChannelIndex < tSourceChannels,
"Invalid channel index!");
2805 static_assert(tTargetChannelIndex < tTargetChannels,
"Invalid channel index!");
2807 ocean_assert(source !=
nullptr && target !=
nullptr);
2808 ocean_assert(width >= 1u && height >= 1u);
2810 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2811 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
2815 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2817 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements,
CONVERT_NORMAL, FrameChannels::copyChannelRow<T, tSourceChannels, tTargetChannels, tSourceChannelIndex, tTargetChannelIndex>, reversePixelOrderRowInPlaceFunction, areContinuous,
nullptr, worker);
2820template <
typename T,
unsigned int tChannel,
unsigned int tChannels>
2821inline void FrameChannels::setChannel(T* frame,
const unsigned int width,
const unsigned int height,
const T value,
const unsigned int framePaddingElements,
Worker* worker)
2823 static_assert(tChannels >= 1u,
"Invalid channel number!");
2824 static_assert(tChannel < tChannels,
"Invalid channel index!");
2826 ocean_assert(frame !=
nullptr);
2827 ocean_assert(width >= 1u && height >= 1u);
2831 worker->
executeFunction(
Worker::Function::createStatic(&setChannelSubset<T, tChannel, tChannels>, frame, width, value, framePaddingElements, 0u, 0u), 0u, height);
2835 setChannelSubset<T, tChannel, tChannels>(frame, width, value, framePaddingElements, 0u, height);
2839template <
typename T,
unsigned int tChannels>
2842 static_assert(tChannels >= 1u,
"Invalid channel number!");
2844 ocean_assert(source !=
nullptr && target !=
nullptr);
2845 ocean_assert(width >= 1u && height >= 1u);
2847 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
2848 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
2850 constexpr bool areContinuous =
false;
2852 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::reverseRowChannelOrder<T, tChannels>, FrameChannels::reverseRowPixelOrderInPlace<T, tChannels>, areContinuous,
nullptr, worker);
2855template <
typename T,
unsigned int tChannels>
2858 static_assert(tChannels >= 1u,
"Invalid channel number!");
2860 ocean_assert(source !=
nullptr && target !=
nullptr);
2861 ocean_assert(size >= 1);
2864 const T*
const debugSourceStart = source;
2865 const T*
const debugSourceEnd = debugSourceStart + size * tChannels;
2867 const T*
const debugTargetStart = target;
2868 const T*
const debugTargetEnd = debugTargetStart + size * tChannels;
2872 target += size * tChannels;
2874 const T*
const sourceEnd = source + size * tChannels;
2876#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
2878 if constexpr (std::is_same<typename TypeMapper<T>::Type, uint8_t>::value)
2880 const size_t blocks16 = size /
size_t(16);
2886 for (
size_t n = 0; n < blocks16; ++n)
2888 target -= 16u * tChannels;
2890 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2891 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2893 const uint8x16_t source_u_8x16 = vld1q_u8((
const uint8_t*)(source));
2894 uint8x16_t revSource_u_8x16 = vrev64q_u8(source_u_8x16);
2895 revSource_u_8x16 = vcombine_u8(vget_high_u8(revSource_u_8x16), vget_low_u8(revSource_u_8x16));
2897 vst1q_u8((uint8_t*)(target), revSource_u_8x16);
2899 source += 16u * tChannels;
2907 for (
size_t n = 0; n < blocks16; ++n)
2909 target -= 16u * tChannels;
2911 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2912 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2914 const uint8x16_t sourceA_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 0);
2915 const uint8x16_t sourceB_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 16);
2917 const uint8x16_t revSourceA_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u8(sourceA_u_8x16)));
2918 const uint8x16_t revSourceB_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u8(sourceB_u_8x16)));
2920 const uint8x16_t targetA_u_8x16 = vcombine_u8(vget_high_u8(revSourceA_u_8x16), vget_low_u8(revSourceA_u_8x16));
2921 const uint8x16_t targetB_u_8x16 = vcombine_u8(vget_high_u8(revSourceB_u_8x16), vget_low_u8(revSourceB_u_8x16));
2923 vst1q_u8((uint8_t*)(target) + 0, targetB_u_8x16);
2924 vst1q_u8((uint8_t*)(target) + 16, targetA_u_8x16);
2926 source += 16u * tChannels;
2934 for (
size_t n = 0; n < blocks16; ++n)
2936 target -= 16u * tChannels;
2938 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2939 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2941 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)(source));
2943 uint8x16x3_t revSource_u_8x16x3;
2944 revSource_u_8x16x3.val[0] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[0])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[0])));
2945 revSource_u_8x16x3.val[1] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[1])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[1])));
2946 revSource_u_8x16x3.val[2] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[2])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[2])));
2948 vst3q_u8((uint8_t*)(target), revSource_u_8x16x3);
2950 source += 16u * tChannels;
2958 for (
size_t n = 0; n < blocks16; ++n)
2960 target -= 16u * tChannels;
2962 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2963 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2965 const uint8x16_t sourceA_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 0);
2966 const uint8x16_t sourceB_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 16);
2967 const uint8x16_t sourceC_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 32);
2968 const uint8x16_t sourceD_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 48);
2970 const uint8x16_t revSourceA_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceA_u_8x16)));
2971 const uint8x16_t revSourceB_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceB_u_8x16)));
2972 const uint8x16_t revSourceC_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceC_u_8x16)));
2973 const uint8x16_t revSourceD_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceD_u_8x16)));
2975 const uint8x16_t targetA_u_8x16 = vcombine_u8(vget_high_u8(revSourceA_u_8x16), vget_low_u8(revSourceA_u_8x16));
2976 const uint8x16_t targetB_u_8x16 = vcombine_u8(vget_high_u8(revSourceB_u_8x16), vget_low_u8(revSourceB_u_8x16));
2977 const uint8x16_t targetC_u_8x16 = vcombine_u8(vget_high_u8(revSourceC_u_8x16), vget_low_u8(revSourceC_u_8x16));
2978 const uint8x16_t targetD_u_8x16 = vcombine_u8(vget_high_u8(revSourceD_u_8x16), vget_low_u8(revSourceD_u_8x16));
2980 vst1q_u8((uint8_t*)(target) + 0, targetD_u_8x16);
2981 vst1q_u8((uint8_t*)(target) + 16, targetC_u_8x16);
2982 vst1q_u8((uint8_t*)(target) + 32, targetB_u_8x16);
2983 vst1q_u8((uint8_t*)(target) + 48, targetA_u_8x16);
2985 source += 16u * tChannels;
2998 while (source != sourceEnd)
3000 ocean_assert(source < sourceEnd);
3002 for (
unsigned int n = 0u; n < tChannels; ++n)
3004 ocean_assert(source + tChannels - n - 1u >= debugSourceStart);
3005 ocean_assert(source + tChannels - n - 1u < debugSourceEnd);
3007 ocean_assert(target > debugTargetStart && target <= debugTargetEnd);
3009 *--target = source[tChannels - n - 1u];
3012 source += tChannels;
3016template <
typename T,
unsigned int tChannels>
3019 static_assert(tChannels >= 1u,
"Invalid channel number!");
3021 ocean_assert(data !=
nullptr);
3022 ocean_assert(size >= 1);
3028#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3030 if constexpr (std::is_same<typename TypeMapper<T>::Type, uint8_t>::value)
3034 const size_t blocks32 = size /
size_t(32);
3036 uint8_t* left = (uint8_t*)(data);
3037 uint8_t* right = (uint8_t*)(data) + (size - 16u) * tChannels;
3043 for (
size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3045 const uint8x16_t left_u_8x16 = vld1q_u8(left);
3046 const uint8x16_t right_u_8x16 = vld1q_u8(right);
3048 uint8x16_t revLeft_u_8x16 = vrev64q_u8(left_u_8x16);
3049 revLeft_u_8x16 = vcombine_u8(vget_high_u8(revLeft_u_8x16), vget_low_u8(revLeft_u_8x16));
3051 uint8x16_t revRight_u_8x16 = vrev64q_u8(right_u_8x16);
3052 revRight_u_8x16 = vcombine_u8(vget_high_u8(revRight_u_8x16), vget_low_u8(revRight_u_8x16));
3054 vst1q_u8(left, revRight_u_8x16);
3055 vst1q_u8(right, revLeft_u_8x16);
3057 left += 16u * tChannels;
3058 right -= 16u * tChannels;
3061 n += blocks32 * 16u;
3068 for (
size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3070 const uint8x16x2_t left_u_8x16x2 = vld2q_u8(left);
3071 const uint8x16x2_t right_u_8x16x2 = vld2q_u8(right);
3073 uint8x16x2_t revLeft_u_8x16x2;
3074 revLeft_u_8x16x2.val[0] = vrev64q_u8(left_u_8x16x2.val[0]);
3075 revLeft_u_8x16x2.val[1] = vrev64q_u8(left_u_8x16x2.val[1]);
3076 revLeft_u_8x16x2.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x2.val[0]), vget_low_u8(revLeft_u_8x16x2.val[0]));
3077 revLeft_u_8x16x2.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x2.val[1]), vget_low_u8(revLeft_u_8x16x2.val[1]));
3079 uint8x16x2_t revRight_u_8x16x2;
3080 revRight_u_8x16x2.val[0] = vrev64q_u8(right_u_8x16x2.val[0]);
3081 revRight_u_8x16x2.val[1] = vrev64q_u8(right_u_8x16x2.val[1]);
3082 revRight_u_8x16x2.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x2.val[0]), vget_low_u8(revRight_u_8x16x2.val[0]));
3083 revRight_u_8x16x2.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x2.val[1]), vget_low_u8(revRight_u_8x16x2.val[1]));
3085 vst2q_u8(left, revRight_u_8x16x2);
3086 vst2q_u8(right, revLeft_u_8x16x2);
3088 left += 16u * tChannels;
3089 right -= 16u * tChannels;
3092 n += blocks32 * 16u;
3099 for (
size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3101 const uint8x16x3_t left_u_8x16x3 = vld3q_u8(left);
3102 const uint8x16x3_t right_u_8x16x3 = vld3q_u8(right);
3104 uint8x16x3_t revLeft_u_8x16x3;
3105 revLeft_u_8x16x3.val[0] = vrev64q_u8(left_u_8x16x3.val[0]);
3106 revLeft_u_8x16x3.val[1] = vrev64q_u8(left_u_8x16x3.val[1]);
3107 revLeft_u_8x16x3.val[2] = vrev64q_u8(left_u_8x16x3.val[2]);
3108 revLeft_u_8x16x3.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[0]), vget_low_u8(revLeft_u_8x16x3.val[0]));
3109 revLeft_u_8x16x3.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[1]), vget_low_u8(revLeft_u_8x16x3.val[1]));
3110 revLeft_u_8x16x3.val[2] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[2]), vget_low_u8(revLeft_u_8x16x3.val[2]));
3112 uint8x16x3_t revRight_u_8x16x3;
3113 revRight_u_8x16x3.val[0] = vrev64q_u8(right_u_8x16x3.val[0]);
3114 revRight_u_8x16x3.val[1] = vrev64q_u8(right_u_8x16x3.val[1]);
3115 revRight_u_8x16x3.val[2] = vrev64q_u8(right_u_8x16x3.val[2]);
3116 revRight_u_8x16x3.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[0]), vget_low_u8(revRight_u_8x16x3.val[0]));
3117 revRight_u_8x16x3.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[1]), vget_low_u8(revRight_u_8x16x3.val[1]));
3118 revRight_u_8x16x3.val[2] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[2]), vget_low_u8(revRight_u_8x16x3.val[2]));
3120 vst3q_u8(left, revRight_u_8x16x3);
3121 vst3q_u8(right, revLeft_u_8x16x3);
3123 left += 16u * tChannels;
3124 right -= 16u * tChannels;
3127 n += blocks32 * 16u;
3134 for (
size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3136 const uint8x16x4_t left_u_8x16x4 = vld4q_u8(left);
3137 const uint8x16x4_t right_u_8x16x4 = vld4q_u8(right);
3139 uint8x16x4_t revLeft_u_8x16x4;
3140 revLeft_u_8x16x4.val[0] = vrev64q_u8(left_u_8x16x4.val[0]);
3141 revLeft_u_8x16x4.val[1] = vrev64q_u8(left_u_8x16x4.val[1]);
3142 revLeft_u_8x16x4.val[2] = vrev64q_u8(left_u_8x16x4.val[2]);
3143 revLeft_u_8x16x4.val[3] = vrev64q_u8(left_u_8x16x4.val[3]);
3144 revLeft_u_8x16x4.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[0]), vget_low_u8(revLeft_u_8x16x4.val[0]));
3145 revLeft_u_8x16x4.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[1]), vget_low_u8(revLeft_u_8x16x4.val[1]));
3146 revLeft_u_8x16x4.val[2] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[2]), vget_low_u8(revLeft_u_8x16x4.val[2]));
3147 revLeft_u_8x16x4.val[3] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[3]), vget_low_u8(revLeft_u_8x16x4.val[3]));
3149 uint8x16x4_t revRight_u_8x16x4;
3150 revRight_u_8x16x4.val[0] = vrev64q_u8(right_u_8x16x4.val[0]);
3151 revRight_u_8x16x4.val[1] = vrev64q_u8(right_u_8x16x4.val[1]);
3152 revRight_u_8x16x4.val[2] = vrev64q_u8(right_u_8x16x4.val[2]);
3153 revRight_u_8x16x4.val[3] = vrev64q_u8(right_u_8x16x4.val[3]);
3154 revRight_u_8x16x4.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[0]), vget_low_u8(revRight_u_8x16x4.val[0]));
3155 revRight_u_8x16x4.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[1]), vget_low_u8(revRight_u_8x16x4.val[1]));
3156 revRight_u_8x16x4.val[2] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[2]), vget_low_u8(revRight_u_8x16x4.val[2]));
3157 revRight_u_8x16x4.val[3] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[3]), vget_low_u8(revRight_u_8x16x4.val[3]));
3159 vst4q_u8(left, revRight_u_8x16x4);
3160 vst4q_u8(right, revLeft_u_8x16x4);
3162 left += 16u * tChannels;
3163 right -= 16u * tChannels;
3166 n += blocks32 * 16u;
3179 PixelType intermediate;
3181 PixelType*
const pixels = (PixelType*)(data);
3183 while (n < size / 2)
3185 intermediate = pixels[n];
3187 pixels[n] = pixels[size - n - 1];
3188 pixels[size - n - 1] = intermediate;
3194template <
typename T,
unsigned int tChannels>
3197 ocean_assert(source !=
nullptr && target !=
nullptr);
3198 ocean_assert(source != target);
3199 ocean_assert(size >= 1);
3202 const T*
const debugSourceStart = source;
3203 const T*
const debugSourceEnd = debugSourceStart + size * tChannels;
3205 const T*
const debugTargetStart = target;
3206 const T*
const debugTargetEnd = debugTargetStart + size * tChannels;
3209 if constexpr (tChannels == 1)
3213 memcpy(target, source,
sizeof(T) * size);
3217 const T*
const sourceEnd = source + size * tChannels;
3219#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
3223 const size_t blocks16 = size /
size_t(16);
3228 ocean_assert(
false &&
"This should have been handled above!");
3233 for (
size_t n = 0; n < blocks16; ++n)
3237 source += 16u * tChannels;
3238 target += 16u * tChannels;
3246 for (
size_t n = 0; n < blocks16; ++n)
3250 source += 16u * tChannels;
3251 target += 16u * tChannels;
3259 for (
size_t n = 0; n < blocks16; ++n)
3263 source += 16u * tChannels;
3264 target += 16u * tChannels;
3275#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3279 const size_t blocks16 = size /
size_t(16);
3284 ocean_assert(
false &&
"This should have been handled above!");
3289 for (
size_t n = 0; n < blocks16; ++n)
3291 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3292 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3294 const uint8x16_t sourceA_u_8x16 = vld1q_u8((
const uint8_t*)source + 0);
3295 const uint8x16_t sourceB_u_8x16 = vld1q_u8((
const uint8_t*)source + 16);
3297 const uint8x16_t revSourceA_u_8x16 = vrev16q_u8(sourceA_u_8x16);
3298 const uint8x16_t revSourceB_u_8x16 = vrev16q_u8(sourceB_u_8x16);
3300 vst1q_u8((uint8_t*)target + 0, revSourceA_u_8x16);
3301 vst1q_u8((uint8_t*)target + 16, revSourceB_u_8x16);
3303 source += 16u * tChannels;
3304 target += 16u * tChannels;
3312 for (
size_t n = 0; n < blocks16; ++n)
3314 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3315 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3317 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3319 uint8x16x3_t revSource_u_8x16x3;
3320 revSource_u_8x16x3.val[0] = source_u_8x16x3.val[2];
3321 revSource_u_8x16x3.val[1] = source_u_8x16x3.val[1];
3322 revSource_u_8x16x3.val[2] = source_u_8x16x3.val[0];
3324 vst3q_u8((uint8_t*)target, revSource_u_8x16x3);
3326 source += 16u * tChannels;
3327 target += 16u * tChannels;
3335 for (
size_t n = 0; n < blocks16; ++n)
3337 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3338 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3340 const uint8x16_t sourceA_u_8x16 = vld1q_u8((
const uint8_t*)source + 0);
3341 const uint8x16_t sourceB_u_8x16 = vld1q_u8((
const uint8_t*)source + 16);
3342 const uint8x16_t sourceC_u_8x16 = vld1q_u8((
const uint8_t*)source + 32);
3343 const uint8x16_t sourceD_u_8x16 = vld1q_u8((
const uint8_t*)source + 48);
3345 const uint8x16_t revSourceA_u_8x16 = vrev32q_u8(sourceA_u_8x16);
3346 const uint8x16_t revSourceB_u_8x16 = vrev32q_u8(sourceB_u_8x16);
3347 const uint8x16_t revSourceC_u_8x16 = vrev32q_u8(sourceC_u_8x16);
3348 const uint8x16_t revSourceD_u_8x16 = vrev32q_u8(sourceD_u_8x16);
3350 vst1q_u8((uint8_t*)target + 0, revSourceA_u_8x16);
3351 vst1q_u8((uint8_t*)target + 16, revSourceB_u_8x16);
3352 vst1q_u8((uint8_t*)target + 32, revSourceC_u_8x16);
3353 vst1q_u8((uint8_t*)target + 48, revSourceD_u_8x16);
3355 source += 16u * tChannels;
3356 target += 16u * tChannels;
3369 while (source != sourceEnd)
3371 ocean_assert(source < sourceEnd);
3373 ocean_assert(source >= debugSourceStart && source + tChannels <= debugSourceEnd);
3374 ocean_assert(target >= debugTargetStart && target + tChannels <= debugTargetEnd);
3376 for (
unsigned int n = 0u; n < tChannels; ++n)
3378 target[n] = source[tChannels - n - 1u];
3381 source += tChannels;
3382 target += tChannels;
3386template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
3389 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u,
"Invalid channel number!");
3390 static_assert(tTargetChannels >= 1u && tTargetChannels <= 8u,
"Invalid channel number!");
3392 static_assert(tSourceChannels != 1u || tTargetChannels != 1u,
"Invalid channel number!");
3394 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels,
"Invalid shuffle pattern!");
3395 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels,
"Invalid shuffle pattern!");
3396 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels,
"Invalid shuffle pattern!");
3397 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels,
"Invalid shuffle pattern!");
3398 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels,
"Invalid shuffle pattern!");
3399 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels,
"Invalid shuffle pattern!");
3400 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels,
"Invalid shuffle pattern!");
3401 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels,
"Invalid shuffle pattern!");
3403 ocean_assert(source !=
nullptr && target !=
nullptr);
3404 ocean_assert(size != 0);
3406 const T*
const sourceEnd = source + size * tSourceChannels;
3408#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
3412 const size_t blocks16 = size /
size_t(16);
3414 switch (tSourceChannels | ((tTargetChannels) << 4u))
3417 case (4u | (4u << 4u)):
3421 constexpr unsigned int offset1 = 0x04040404u;
3422 constexpr unsigned int offset2 = 0x08080808u;
3423 constexpr unsigned int offset3 = 0x0C0C0C0Cu;
3426 const unsigned int shufflePattern0 = ((tShufflePattern & 0xF000u) << 12u) | ((tShufflePattern & 0x0F00u) << 8u) | ((tShufflePattern & 0x00F0u) << 4u) | ((tShufflePattern & 0x000Fu) << 0u);
3428 const unsigned int shufflePattern1 = shufflePattern0 + offset1;
3429 const unsigned int shufflePattern2 = shufflePattern0 + offset2;
3430 const unsigned int shufflePattern3 = shufflePattern0 + offset3;
3432 const __m128i shufflePattern128 =
SSE::set128i((((
unsigned long long)shufflePattern3) << 32ull) | (
unsigned long long)shufflePattern2, (((
unsigned long long)shufflePattern1) << 32ull) | (
unsigned long long)shufflePattern0);
3434 for (
size_t n = 0; n < blocks16; ++n)
3441 source += 16u * tSourceChannels;
3442 target += 16u * tTargetChannels;
3454#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3458 const size_t blocks16 = size /
size_t(16);
3460 switch (tSourceChannels | ((tTargetChannels) << 4u))
3463 case (1u | (3u << 4u)):
3465 static_assert(tSourceChannels != 1u || tShufflePattern == 0u,
"Invalid shuffle patter!");
3467 for (
size_t n = 0; n < blocks16; ++n)
3469 const uint8x16_t source_u_8x16 = vld1q_u8((
const uint8_t*)source);
3471 uint8x16x3_t target_u_8x16x3;
3473 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3475 target_u_8x16x3.val[nT] = source_u_8x16;
3478 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3480 source += 16u * tSourceChannels;
3481 target += 16u * tTargetChannels;
3488 case (2u | (1u << 4u)):
3490 for (
size_t n = 0; n < blocks16; ++n)
3492 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((
const uint8_t*)source);
3494 constexpr unsigned int sourceChannel = tShufflePattern & 0x00000001u;
3495 static_assert(sourceChannel <= 1u,
"Invalid shuffle pattern!");
3496 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3498 const uint8x16_t target_u_8x16 = source_u_8x16x2.val[sourceChannel];
3500 vst1q_u8((uint8_t*)target, target_u_8x16);
3502 source += 16u * tSourceChannels;
3503 target += 16u * tTargetChannels;
3510 case (2u | (3u << 4u)):
3512 for (
size_t n = 0; n < blocks16; ++n)
3514 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((
const uint8_t*)source);
3516 uint8x16x3_t target_u_8x16x3;
3518 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3520 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000001u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3522 target_u_8x16x3.val[nT] = source_u_8x16x2.val[(tShufflePattern >> (nT * 4u)) & 0x00000001u];
3525 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3527 source += 16u * tSourceChannels;
3528 target += 16u * tTargetChannels;
3535 case (2u | (4u << 4u)):
3537 for (
size_t n = 0; n < blocks16; ++n)
3539 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((
const uint8_t*)source);
3541 uint8x16x4_t target_u_8x16x4;
3543 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3545 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000001u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3547 target_u_8x16x4.val[nT] = source_u_8x16x2.val[(tShufflePattern >> (nT * 4u)) & 0x00000001u];
3550 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3552 source += 16u * tSourceChannels;
3553 target += 16u * tTargetChannels;
3560 case (3u | (1u << 4u)):
3562 constexpr unsigned int sourceChannel = (tShufflePattern & 0x0000000Fu) <= 2u ? (tShufflePattern & 0x0000000Fu) : 2u;
3563 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3565 for (
size_t n = 0; n < blocks16; ++n)
3567 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3569 const uint8x16_t target_u_8x16 = source_u_8x16x3.val[sourceChannel];
3571 vst1q_u8((uint8_t*)target, target_u_8x16);
3573 source += 16u * tSourceChannels;
3574 target += 16u * tTargetChannels;
3581 case (3u | (2u << 4u)):
3583 for (
size_t n = 0; n < blocks16; ++n)
3585 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3587 uint8x16x2_t target_u_8x16x2;
3589 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3591 target_u_8x16x2.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)];
3594 vst2q_u8((uint8_t*)target, target_u_8x16x2);
3596 source += 16u * tSourceChannels;
3597 target += 16u * tTargetChannels;
3604 case (3u | (3u << 4u)):
3606 for (
size_t n = 0; n < blocks16; ++n)
3608 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3610 uint8x16x3_t target_u_8x16x3;
3612 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3614 target_u_8x16x3.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)];
3617 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3619 source += 16u * tSourceChannels;
3620 target += 16u * tTargetChannels;
3627 case (4u | (1u << 4u)):
3629 for (
size_t n = 0; n < blocks16; ++n)
3631 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3633 constexpr unsigned int sourceChannel = tShufflePattern & 0x00000003u;
3634 static_assert(sourceChannel <= 3u,
"Invalid shuffle pattern!");
3636 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3638 const uint8x16_t target_u_8x16 = source_u_8x16x4.val[sourceChannel];
3640 vst1q_u8((uint8_t*)target, target_u_8x16);
3642 source += 16u * tSourceChannels;
3643 target += 16u * tTargetChannels;
3650 case (4u | (2u << 4u)):
3652 for (
size_t n = 0; n < blocks16; ++n)
3654 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3656 uint8x16x2_t target_u_8x16x2;
3658 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3660 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3662 target_u_8x16x2.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u];
3665 vst2q_u8((uint8_t*)target, target_u_8x16x2);
3667 source += 16u * tSourceChannels;
3668 target += 16u * tTargetChannels;
3675 case (4u | (3u << 4u)):
3677 for (
size_t n = 0; n < blocks16; ++n)
3679 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3681 uint8x16x3_t target_u_8x16x3;
3683 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3685 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3687 target_u_8x16x3.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u];
3690 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3692 source += 16u * tSourceChannels;
3693 target += 16u * tTargetChannels;
3700 case (4u | (4u << 4u)):
3702 for (
size_t n = 0; n < blocks16; ++n)
3704 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3706 uint8x16x4_t target_u_8x16x4;
3708 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3710 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3712 target_u_8x16x4.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u];
3715 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3717 source += 16u * tSourceChannels;
3718 target += 16u * tTargetChannels;
3732 while (source != sourceEnd)
3734 ocean_assert(source < sourceEnd);
3736 for (
unsigned int n = 0u; n < tTargetChannels; ++n)
3738 target[n] = source[(tShufflePattern >> (n * 4u)) & 0x0000000Fu];
3741 source += tSourceChannels;
3742 target += tTargetChannels;
3746template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
3749 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u,
"Invalid channel number!");
3750 static_assert(tTargetChannels >= 2u && tTargetChannels <= 8u,
"Invalid channel number!");
3752 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels,
"Invalid shuffle pattern!");
3753 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels,
"Invalid shuffle pattern!");
3754 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels,
"Invalid shuffle pattern!");
3755 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels,
"Invalid shuffle pattern!");
3756 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels,
"Invalid shuffle pattern!");
3757 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels,
"Invalid shuffle pattern!");
3758 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels,
"Invalid shuffle pattern!");
3759 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels,
"Invalid shuffle pattern!");
3761 ocean_assert(source !=
nullptr && target !=
nullptr);
3762 ocean_assert(size != 0);
3764 ocean_assert(options !=
nullptr);
3766 const T lastChannelValue = *(
const T*)(options);
3768 const T*
const sourceEnd = source + size * tSourceChannels;
3770#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3774 const size_t blocks16 = size /
size_t(16);
3776 switch (tSourceChannels | ((tTargetChannels) << 4u))
3779 case (1u | (4u << 4u)):
3781 ocean_assert(tShufflePattern == 0u);
3783 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3785 uint8x16x4_t target_u_8x16x4;
3786 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3788 for (
size_t n = 0; n < blocks16; ++n)
3790 const uint8x16_t source_u_8x16 = vld1q_u8((
const uint8_t*)source);
3792 for (
unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3794 target_u_8x16x4.val[nT] = source_u_8x16;
3797 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3799 source += 16u * tSourceChannels;
3800 target += 16u * tTargetChannels;
3807 case (3u | (4u << 4u)):
3809 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3811 uint8x16x4_t target_u_8x16x4;
3812 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3814 for (
size_t n = 0; n < blocks16; ++n)
3816 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3818 for (
unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3820 target_u_8x16x4.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)];
3823 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3825 source += 16u * tSourceChannels;
3826 target += 16u * tTargetChannels;
3833 case (4u | (4u << 4u)):
3835 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3837 uint8x16x4_t target_u_8x16x4;
3838 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3840 for (
size_t n = 0; n < blocks16; ++n)
3842 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3844 for (
unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3846 target_u_8x16x4.val[nT] = source_u_8x16x4.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 3u)];
3849 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3851 source += 16u * tSourceChannels;
3852 target += 16u * tTargetChannels;
3866 while (source != sourceEnd)
3868 ocean_assert(source < sourceEnd);
3870 for (
unsigned int n = 0u; n < tTargetChannels - 1u; ++n)
3872 target[n] = source[(tShufflePattern >> (n * 4u)) & 0x0000000Fu];
3873 target[tTargetChannels - 1u] = lastChannelValue;
3876 source += tSourceChannels;
3877 target += tTargetChannels;
3881template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
3884 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u,
"Invalid channel number!");
3885 static_assert(tTargetChannels >= 1u && tTargetChannels <= 8u,
"Invalid channel number!");
3887 static_assert(tSourceChannels != 1u || tTargetChannels != 1u,
"Invalid channel number!");
3889 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels,
"Invalid shuffle pattern!");
3890 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels,
"Invalid shuffle pattern!");
3891 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels,
"Invalid shuffle pattern!");
3892 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels,
"Invalid shuffle pattern!");
3893 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels,
"Invalid shuffle pattern!");
3894 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels,
"Invalid shuffle pattern!");
3895 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels,
"Invalid shuffle pattern!");
3896 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels,
"Invalid shuffle pattern!");
3898 ocean_assert(source !=
nullptr && target !=
nullptr);
3899 ocean_assert(width >= 1u && height >= 1u);
3901 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
3902 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
3904 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3906 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::shuffleRowChannels<T, tSourceChannels, tTargetChannels, tShufflePattern>, FrameChannels::reverseRowPixelOrderInPlace<T, tTargetChannels>, areContinuous,
nullptr, worker);
3909template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
3912 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u,
"Invalid channel number!");
3913 static_assert(tTargetChannels >= 2u && tTargetChannels <= 8u,
"Invalid channel number!");
3915 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels,
"Invalid shuffle pattern!");
3916 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels,
"Invalid shuffle pattern!");
3917 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels,
"Invalid shuffle pattern!");
3918 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels,
"Invalid shuffle pattern!");
3919 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels,
"Invalid shuffle pattern!");
3920 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels,
"Invalid shuffle pattern!");
3921 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels,
"Invalid shuffle pattern!");
3922 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels,
"Invalid shuffle pattern!");
3924 ocean_assert(source !=
nullptr && target !=
nullptr);
3925 ocean_assert(width >= 1u && height >= 1u);
3927 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
3928 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
3930 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3932 const T options = newChannelValue;
3934 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::shuffleRowChannelsAndSetLastChannelValue<T, tSourceChannels, tTargetChannels, tShufflePattern>, FrameChannels::reverseRowPixelOrderInPlace<T, tTargetChannels>, areContinuous, &options, worker);
3937template <
unsigned int tChannels>
3940 static_assert(tChannels >= 1u,
"Invalid channel number!");
3942 ocean_assert(source !=
nullptr && target !=
nullptr);
3943 ocean_assert(width >= 1u && height >= 1u);
3945 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
3946 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
3948 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3950 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::narrowRow16BitPerChannelTo8BitPerChannel<tChannels>, FrameChannels::reverseRowPixelOrderInPlace<uint8_t, tChannels>, areContinuous,
nullptr, worker);
3953template <
typename T,
unsigned int tChannels,
void (*tPixelFunction)(const T*, T*)>
3956 static_assert(tChannels > 0u,
"Invalid channel number!");
3958 ocean_assert(source && target);
3959 ocean_assert(width != 0u && height != 0u);
3963 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::applyPixelModifierSubset<T, tChannels, tPixelFunction>, source, target, width, height, conversionFlag, 0u, 0u), 0u, height);
3967 applyPixelModifierSubset<T, tChannels, tPixelFunction>(source, target, width, height, conversionFlag, 0u, height);
3971template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tPixelFunction)(const TSource*, TTarget*)>
3974 static_assert(tSourceChannels > 0u,
"Invalid source channel number!");
3975 static_assert(tTargetChannels > 0u,
"Invalid target channel number!");
3977 ocean_assert(source && target);
3978 ocean_assert(width != 0u && height != 0u);
3982 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::applyAdvancedPixelModifierSubset<TSource, TTarget, tSourceChannels, tTargetChannels, tPixelFunction>, source, target, width, height, sourcePaddingElements, targetPaddingElements, conversionFlag, 0u, 0u), 0u, height);
3986 applyAdvancedPixelModifierSubset<TSource, TTarget, tSourceChannels, tTargetChannels, tPixelFunction>(source, target, width, height, sourcePaddingElements, targetPaddingElements, conversionFlag, 0u, height);
3990template <
typename TSource0,
typename TSource1,
typename TTarget,
typename TIntermediate,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
3991void FrameChannels::applyBivariateOperator(
const TSource0* source0,
const TSource1* source1, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int source0PaddingElements,
const unsigned int source1PaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
Worker* worker)
3993 static_assert(tSourceChannels > 0u,
"Invalid source channel number!");
3994 static_assert(tTargetChannels > 0u,
"Invalid target channel number!");
3996 ocean_assert(source0 && source1 && target);
3997 ocean_assert(width != 0u && height != 0u);
4001 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::applyBivariateOperatorSubset<TSource0, TSource1, TTarget, TIntermediate, tSourceChannels, tTargetChannels, tOperator>, source0, source1, target, width, height, source0PaddingElements, source1PaddingElements, targetPaddingElements, conversionFlag, 0u, 0u), 0u, height);
4005 FrameChannels::applyBivariateOperatorSubset<TSource0, TSource1, TTarget, TIntermediate, tSourceChannels, tTargetChannels, tOperator>(source0, source1, target, width, height, source0PaddingElements, source1PaddingElements, targetPaddingElements, conversionFlag, 0u, height);
4009template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
4010void FrameChannels::applyRowOperator(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels>& rowOperatorFunction,
Worker* worker)
4012 static_assert(tSourceChannels > 0u,
"Invalid source channel number!");
4013 static_assert(tTargetChannels > 0u,
"Invalid target channel number!");
4015 ocean_assert(source !=
nullptr && target !=
nullptr);
4016 ocean_assert(width != 0u && height != 0u);
4018 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
4019 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
4023 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::applyRowOperatorSubset<TSource, TTarget, tSourceChannels, tTargetChannels>, source, target, width, height, sourceStrideElements, targetStrideElements, rowOperatorFunction, 0u, 0u), 0u, height);
4027 applyRowOperatorSubset<TSource, TTarget, tSourceChannels, tTargetChannels>(source, target, width, height, sourceStrideElements, targetStrideElements, rowOperatorFunction, 0u, height);
4031template <
typename T,
unsigned int tChannels>
4034 ocean_assert(source !=
nullptr && target !=
nullptr);
4035 ocean_assert(width >= 1u && height >= 1u);
4037 const unsigned int bytesPerRow = width *
sizeof(T) * tChannels;
4039 const unsigned int sourceStrideBytes = width *
sizeof(T) * tChannels +
sizeof(T) * sourcePaddingElements;
4040 const unsigned int targetStrideBytes = width *
sizeof(T) * tChannels +
sizeof(T) * targetPaddingElements;
4046 if (worker && height > 200u)
4048 worker->
executeFunction(
Worker::Function::createStatic(&
FrameChannels::transformGenericSubset, (
const uint8_t*)(source), (uint8_t*)(target), width, height, conversionFlag, rowReversePixelOrderFunction, bytesPerRow, sourceStrideBytes, targetStrideBytes, 0u, 0u), 0u, height, 9u, 10u, 20u);
4052 transformGenericSubset((
const uint8_t*)(source), (uint8_t*)(target), width, height, conversionFlag, rowReversePixelOrderFunction, bytesPerRow, sourceStrideBytes, targetStrideBytes, 0u, height);
4056template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
4059 static_assert(tChannels >= 2u,
"Invalid channel number!");
4060 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
4062 ocean_assert(frame !=
nullptr);
4063 ocean_assert(width >= 1u && height >= 1u);
4065 if (worker && height > 200u)
4067 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, frame, width, framePaddingElements, 0u, 0u), 0u, height, 3u, 4u, 20u);
4071 premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(frame, width, framePaddingElements, 0u, height);
4075template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
4078 static_assert(tChannels >= 2u,
"Invalid channel number!");
4079 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
4081 ocean_assert(source !=
nullptr && target !=
nullptr);
4082 ocean_assert(width >= 1u && height >= 1u);
4084 if (worker && height > 200u)
4086 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, source, target, width, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height, 5u, 6u, 20u);
4090 premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(source, target, width, sourcePaddingElements, targetPaddingElements, 0u, height);
4094template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
4097 static_assert(tChannels >= 2u,
"Invalid channel number!");
4098 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
4100 ocean_assert(frame !=
nullptr);
4101 ocean_assert(width >= 1u && height >= 1u);
4103 if (worker && height > 200u)
4105 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, frame, width, framePaddingElements, 0u, 0u), 0u, height, 3u, 4u, 20u);
4109 straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(frame, width, framePaddingElements, 0u, height);
4113template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
4116 static_assert(tChannels >= 2u,
"Invalid channel number!");
4117 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
4119 ocean_assert(source !=
nullptr && target !=
nullptr);
4120 ocean_assert(width >= 1u && height >= 1u);
4122 if (worker && height > 200u)
4124 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, source, target, width, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height, 5u, 6u, 20u);
4128 straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(source, target, width, sourcePaddingElements, targetPaddingElements, 0u, height);
4132template <
unsigned int tChannels>
4135 static_assert(tChannels >= 1u,
"Invalid channel number!");
4137 ocean_assert(source !=
nullptr && target !=
nullptr);
4138 ocean_assert(size > 0);
4140 const uint16_t*
const sourceEnd = source + size * tChannels;
4142#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4144 const size_t blocks8 = size /
size_t(8);
4150 for (
size_t n = 0; n < blocks8; ++n)
4152 const uint16x8_t sourceA_u_16x8 = vld1q_u16(source + 0);
4153 const uint16x8_t sourceB_u_16x8 = vld1q_u16(source + 8);
4154 const uint16x8_t sourceC_u_16x8 = vld1q_u16(source + 16);
4155 const uint16x8_t sourceD_u_16x8 = vld1q_u16(source + 24);
4157 const uint8x16_t targetAB_u_8x16 = vcombine_u8(vqrshrn_n_u16(sourceA_u_16x8, 8), vqrshrn_n_u16(sourceB_u_16x8, 8));
4158 const uint8x16_t targetCD_u_8x16 = vcombine_u8(vqrshrn_n_u16(sourceC_u_16x8, 8), vqrshrn_n_u16(sourceD_u_16x8, 8));
4160 vst1q_u8(target + 0, targetAB_u_8x16);
4161 vst1q_u8(target + 16, targetCD_u_8x16);
4163 source += 8u * tChannels;
4164 target += 8u * tChannels;
4176 while (source != sourceEnd)
4178 ocean_assert(source < sourceEnd);
4180 for (
unsigned int n = 0u; n < tChannels; ++n)
4182 ocean_assert((uint16_t)(source[n] >> 8u) <= 255u);
4183 target[n] = (uint8_t)(source[n] >> 8u);
4186 source += tChannels;
4187 target += tChannels;
4191template <
typename T,
unsigned int tSourceChannels,
bool tAddToFront>
4194 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
4195 static_assert(
sizeof(
size_t) ==
sizeof(
const T*),
"Invalid pointer size!");
4197 ocean_assert(sources !=
nullptr && targets !=
nullptr);
4198 ocean_assert(width != 0u && height != 0u);
4199 ocean_assert(multipleRowIndex < height);
4200 ocean_assert(options !=
nullptr);
4202 const T* source = (
const T*)(sources[0]);
4203 const T* sourceOneChannel = (
const T*)(sources[1]);
4204 ocean_assert(source !=
nullptr && sourceOneChannel !=
nullptr);
4206 T* target = (T*)(targets[0]);
4207 ocean_assert(target !=
nullptr);
4209 const unsigned int* uintOptions = (
const unsigned int*)options;
4210 ocean_assert(uintOptions !=
nullptr);
4212 const unsigned int sourcePaddingElements = uintOptions[0];
4213 const unsigned int sourceOneChannelPaddingElements = uintOptions[1];
4214 const unsigned int targetPaddingElements = uintOptions[2];
4216 const unsigned int targetChannels = tSourceChannels + 1u;
4218 const unsigned int sourceStrideElements = tSourceChannels * width + sourcePaddingElements;
4219 const unsigned int sourceOneChannelStrideElements = width + sourceOneChannelPaddingElements;
4220 const unsigned int targetStrideElements = targetChannels * width + targetPaddingElements;
4225 const T* sourceRow = source + sourceStrideElements * multipleRowIndex;
4226 const T* sourceOneChannelRow = sourceOneChannel + sourceOneChannelStrideElements * multipleRowIndex;
4227 T* targetRow = flipTarget ? target + targetStrideElements * (height - multipleRowIndex - 1u) : target + targetStrideElements * multipleRowIndex;
4229 if (mirrorTarget ==
false)
4231 for (
unsigned int n = 0u; n < width; ++n)
4233 if constexpr (tAddToFront)
4235 targetRow[0] = sourceOneChannelRow[0];
4237 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4239 targetRow[c + 1u] = sourceRow[c];
4244 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4246 targetRow[c] = sourceRow[c];
4249 targetRow[tSourceChannels] = sourceOneChannelRow[0];
4252 sourceRow += tSourceChannels;
4253 sourceOneChannelRow++;
4255 targetRow += targetChannels;
4260 targetRow += targetChannels * (width - 1u);
4262 for (
unsigned int n = 0u; n < width; ++n)
4264 if constexpr (tAddToFront)
4266 targetRow[0] = sourceOneChannelRow[0];
4268 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4270 targetRow[c + 1u] = sourceRow[c];
4275 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4277 targetRow[c] = sourceRow[c];
4280 targetRow[tSourceChannels] = sourceOneChannelRow[0];
4283 sourceRow += tSourceChannels;
4284 sourceOneChannelRow++;
4286 targetRow -= targetChannels;
4291template <
typename T,
unsigned int tSourceChannels,
bool tAddToFront>
4294 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
4296 ocean_assert(source !=
nullptr && target !=
nullptr);
4297 ocean_assert(size > 0);
4298 ocean_assert(channelValueParameter !=
nullptr);
4300 const T& channelValue = *((
const T*)channelValueParameter);
4302 const unsigned int targetChannels = tSourceChannels + 1u;
4304 for (
size_t n = 0; n < size; ++n)
4306 if constexpr (tAddToFront)
4308 target[0] = channelValue;
4310 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4312 target[c + 1u] = source[c];
4317 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4319 target[c] = source[c];
4322 target[tSourceChannels] = channelValue;
4325 source += tSourceChannels;
4326 target += targetChannels;
4330template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tSourceChannelIndex,
unsigned int tTargetChannelIndex>
4333 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
4334 static_assert(tTargetChannels != 0u,
"Invalid channel number!");
4336 static_assert(tSourceChannelIndex < tSourceChannels,
"Invalid channel number!");
4337 static_assert(tTargetChannelIndex < tTargetChannels,
"Invalid channel number!");
4339 ocean_assert(source !=
nullptr && target !=
nullptr);
4340 ocean_assert(size > 0);
4342 for (
size_t n = 0; n < size; ++n)
4344 target[tTargetChannelIndex] = source[tSourceChannelIndex];
4346 source += tSourceChannels;
4347 target += tTargetChannels;
4351template <
typename TSource,
typename TTarget>
4352void FrameChannels::separateTo1ChannelRuntime(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
4354 ocean_assert(sourceFrame !=
nullptr);
4355 ocean_assert(targetFrames !=
nullptr);
4357 ocean_assert(width != 0u && height != 0u);
4358 ocean_assert(channels != 0u);
4361 for (
unsigned int c = 0u; c < channels; ++c)
4363 ocean_assert(targetFrames[c] !=
nullptr);
4367 if (sourceFramePaddingElements == 0u && targetFramesPaddingElements ==
nullptr)
4369 for (
unsigned int n = 0u; n < width * height; ++n)
4371 for (
unsigned int c = 0u; c < channels; ++c)
4373 targetFrames[c][n] = TTarget(sourceFrame[n * channels + c]);
4377 else if (targetFramesPaddingElements ==
nullptr)
4379 ocean_assert(sourceFramePaddingElements != 0u);
4381 const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
4383 for (
unsigned int y = 0u; y < height; ++y)
4385 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
4387 const unsigned int targetRowOffset = y * width;
4389 for (
unsigned int x = 0u; x < width; ++x)
4391 for (
unsigned int c = 0u; c < channels; ++c)
4393 *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * channels + c));
4400 const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
4402 Indices32 targetFrameStrideElements(channels);
4404 for (
unsigned int c = 0u; c < channels; ++c)
4406 targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
4409 for (
unsigned int y = 0u; y < height; ++y)
4411 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
4413 for (
unsigned int x = 0u; x < width; ++x)
4415 for (
unsigned int c = 0u; c < channels; ++c)
4417 *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * channels + c));
4424template <
typename TSource,
typename TTarget>
4425void FrameChannels::zipChannelsRuntime(
const TSource*
const* sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
4427 ocean_assert(sourceFrames !=
nullptr);
4428 ocean_assert(targetFrame !=
nullptr);
4430 ocean_assert(width != 0u && height != 0u);
4431 ocean_assert(channels != 0u);
4433 bool allSourceFramesContinuous =
true;
4435 if (sourceFramesPaddingElements !=
nullptr)
4437 for (
unsigned int n = 0u; n < channels; ++n)
4439 if (sourceFramesPaddingElements[n] != 0u)
4441 allSourceFramesContinuous =
false;
4447 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
4449 for (
unsigned int n = 0u; n < width * height; ++n)
4451 for (
unsigned int c = 0u; c < channels; ++c)
4453 targetFrame[n * channels + c] = TTarget(sourceFrames[c][n]);
4459 const unsigned int targetFrameStrideElements = width * channels + targetFramePaddingElements;
4461 Indices32 sourceFrameStrideElements(channels);
4463 for (
unsigned int c = 0u; c < channels; ++c)
4465 if (sourceFramesPaddingElements ==
nullptr)
4467 sourceFrameStrideElements[c] = width;
4471 sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
4475 for (
unsigned int y = 0u; y < height; ++y)
4477 TTarget*
const targetRow = targetFrame + y * targetFrameStrideElements;
4479 for (
unsigned int x = 0u; x < width; ++x)
4481 for (
unsigned int c = 0u; c < channels; ++c)
4483 *(targetRow + x * channels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x));
4490template <
typename T,
unsigned int tChannel,
unsigned int tChannels>
4491void FrameChannels::setChannelSubset(T* frame,
const unsigned int width,
const T value,
const unsigned int framePaddingElements,
const unsigned int firstRow,
const unsigned int numberRows)
4493 static_assert(tChannels >= 1u,
"Invalid channel number!");
4494 static_assert(tChannel < tChannels,
"Invalid channel index!");
4496 ocean_assert(frame !=
nullptr);
4498 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
4500 frame += firstRow * frameStrideElements + tChannel;
4502 for (
unsigned int n = 0u; n < numberRows; ++n)
4504 for (
unsigned int x = 0u; x < width; ++x)
4506 frame[x * tChannels] = value;
4509 frame += frameStrideElements;
4513template <
typename T,
unsigned int tChannels,
void (*tPixelFunction)(const T*, T*)>
4516 static_assert(tChannels >= 1u,
"Invalid channel number");
4518 ocean_assert(source && target);
4519 ocean_assert(source != target);
4521 ocean_assert(numberRows > 0u);
4522 ocean_assert(firstRow + numberRows <= height);
4524 const unsigned int widthElements = width * tChannels;
4525 const unsigned int targetBlockSize = widthElements * numberRows;
4527 switch (conversionFlag)
4531 source += firstRow * widthElements;
4532 target += firstRow * widthElements;
4534 const T*
const targetEnd = target + targetBlockSize;
4536 while (target != targetEnd)
4538 tPixelFunction(source, target);
4540 source += tChannels;
4541 target += tChannels;
4549 source += firstRow * widthElements;
4550 target += width * height * tChannels - (firstRow + 1u) * widthElements;
4552 const T*
const targetEnd = target - targetBlockSize;
4554 while (target != targetEnd)
4556 const T*
const targetRowEnd = target + widthElements;
4558 while (target != targetRowEnd)
4560 tPixelFunction(source, target);
4562 source += tChannels;
4563 target += tChannels;
4566 target -= (widthElements << 1);
4574 source += firstRow * widthElements;
4575 target += (firstRow + 1u) * widthElements;
4577 const T*
const targetEnd = target + targetBlockSize;
4579 while (target != targetEnd)
4581 const T*
const targetRowEnd = target - widthElements;
4583 while (target != targetRowEnd)
4585 tPixelFunction(source, target -= tChannels);
4587 source += tChannels;
4590 target += widthElements << 1;
4598 source += firstRow * widthElements;
4599 target += width * height * tChannels - firstRow * widthElements;
4601 const T*
const targetEnd = target - targetBlockSize;
4603 while (target != targetEnd)
4605 tPixelFunction(source, target -= tChannels);
4607 source += tChannels;
4618template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tPixelFunction)(const TSource*, TTarget*)>
4619void FrameChannels::applyAdvancedPixelModifierSubset(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows)
4621 static_assert(tSourceChannels >= 1u,
"Invalid source channel number");
4622 static_assert(tTargetChannels >= 1u,
"Invalid target channel number");
4624 ocean_assert(source && target);
4625 ocean_assert((
void*)source != (
void*)target);
4627 ocean_assert(numberRows != 0u);
4628 ocean_assert(firstRow + numberRows <= height);
4630 const unsigned int sourceWidthElements = width * tSourceChannels;
4631 const unsigned int targetWidthElements = width * tTargetChannels;
4633 const unsigned int sourceStrideElements = sourceWidthElements + sourcePaddingElements;
4634 const unsigned int targetStrideElements = targetWidthElements + targetPaddingElements;
4636 switch (conversionFlag)
4640 for (
unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4642 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4643 TTarget* targetPixel = target + rowIndex * targetStrideElements;
4645 for (
unsigned int x = 0u; x < width; ++x)
4647 tPixelFunction(sourcePixel, targetPixel);
4649 sourcePixel += tSourceChannels;
4650 targetPixel += tTargetChannels;
4659 for (
unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4661 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4662 TTarget* targetPixel = target + (height - rowIndex - 1u) * targetStrideElements;
4664 for (
unsigned int x = 0u; x < width; ++x)
4666 tPixelFunction(sourcePixel, targetPixel);
4668 sourcePixel += tSourceChannels;
4669 targetPixel += tTargetChannels;
4678 for (
unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4680 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4682 TTarget*
const targetRowBegin = target + rowIndex * targetStrideElements;
4683 TTarget* targetPixel = targetRowBegin + targetWidthElements - tTargetChannels;
4685 for (
unsigned int x = 0u; x < width; ++x)
4687 ocean_assert(targetPixel >= targetRowBegin);
4688 tPixelFunction(sourcePixel, targetPixel);
4690 sourcePixel += tSourceChannels;
4691 targetPixel -= tTargetChannels;
4700 for (
unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4702 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4704 TTarget*
const targetRowBegin = target + (height - rowIndex - 1u) * targetStrideElements;
4705 TTarget* targetPixel = targetRowBegin + targetWidthElements - tTargetChannels;
4707 for (
unsigned int x = 0u; x < width; ++x)
4709 ocean_assert(targetPixel >= targetRowBegin);
4710 tPixelFunction(sourcePixel, targetPixel);
4712 sourcePixel += tSourceChannels;
4713 targetPixel -= tTargetChannels;
4725template <
typename TSource0,
typename TSource1,
typename TTarget,
typename TIntermediate,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
4726void FrameChannels::applyBivariateOperatorSubset(
const TSource0* source0,
const TSource1* source1, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int source0PaddingElements,
const unsigned int source1PaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows)
4728 static_assert(tSourceChannels >= 1u,
"Invalid source channel number");
4729 static_assert(tTargetChannels >= 1u,
"Invalid target channel number");
4730 static_assert(tOperator,
"Invalid operator function");
4732 ocean_assert(source0 !=
nullptr && source1 !=
nullptr && target !=
nullptr);
4733 ocean_assert((
const void*)(source0) != (
const void*)(target));
4734 ocean_assert((
const void*)(source1) != (
const void*)(target));
4736 ocean_assert(numberRows != 0u);
4737 ocean_assert(firstRow + numberRows <= height);
4739 const unsigned int source0StrideElements = width * tSourceChannels + source0PaddingElements;
4740 const unsigned int source1StrideElements = width * tSourceChannels + source1PaddingElements;
4742 const unsigned int targetWidthElements = width * tTargetChannels;
4744 const unsigned int targetStrideElements = targetWidthElements + targetPaddingElements;
4746 switch (conversionFlag)
4750 for (
unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4752 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4753 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4755 TTarget* rowTarget = target + rowIndex * targetStrideElements;
4756 const TTarget*
const rowTargetEnd = rowTarget + targetWidthElements;
4758 while (rowTarget != rowTargetEnd)
4760 ocean_assert(rowTarget < rowTargetEnd);
4762 tOperator(rowSource0, rowSource1, rowTarget);
4764 rowSource0 += tSourceChannels;
4765 rowSource1 += tSourceChannels;
4767 rowTarget += tTargetChannels;
4776 for (
unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4778 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4779 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4781 TTarget* rowTarget = target + (height - rowIndex - 1u) * targetStrideElements;
4782 const TTarget*
const rowTargetEnd = rowTarget + targetWidthElements;
4784 while (rowTarget != rowTargetEnd)
4786 ocean_assert(rowTarget < rowTargetEnd);
4788 tOperator(rowSource0, rowSource1, rowTarget);
4790 rowSource0 += tSourceChannels;
4791 rowSource1 += tSourceChannels;
4793 rowTarget += tTargetChannels;
4802 for (
unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4804 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4805 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4807 TTarget* rowTarget = target + rowIndex * targetStrideElements + targetWidthElements - tTargetChannels;
4808 const TTarget*
const rowTargetEnd = rowTarget - targetWidthElements;
4810 while (rowTarget != rowTargetEnd)
4812 ocean_assert(rowTarget > rowTargetEnd);
4814 tOperator(rowSource0, rowSource1, rowTarget);
4816 rowSource0 += tSourceChannels;
4817 rowSource1 += tSourceChannels;
4819 rowTarget -= tTargetChannels;
4828 for (
unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4830 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4831 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4833 TTarget* rowTarget = target + (height - rowIndex - 1u) * targetStrideElements + targetWidthElements - tTargetChannels;
4834 const TTarget*
const rowTargetEnd = rowTarget - targetWidthElements;
4836 while (rowTarget != rowTargetEnd)
4838 ocean_assert(rowTarget > rowTargetEnd);
4840 tOperator(rowSource0, rowSource1, rowTarget);
4842 rowSource0 += tSourceChannels;
4843 rowSource1 += tSourceChannels;
4845 rowTarget -= tTargetChannels;
4853 ocean_assert(
false &&
"This should never happen!");
4858template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
4859void FrameChannels::applyRowOperatorSubset(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourceStrideElements,
const unsigned int targetStrideElements,
const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels> rowOperatorFunction,
const unsigned int firstRow,
const unsigned int numberRows)
4861 static_assert(tSourceChannels >= 1u,
"Invalid source channel number");
4862 static_assert(tTargetChannels >= 1u,
"Invalid target channel number");
4864 ocean_assert(source !=
nullptr && target !=
nullptr);
4865 ocean_assert((
const void*)source != (
const void*)target);
4867 ocean_assert(width * tSourceChannels <= sourceStrideElements);
4868 ocean_assert(width * tTargetChannels <= targetStrideElements);
4870 ocean_assert(rowOperatorFunction !=
nullptr);
4872 ocean_assert(numberRows != 0u);
4873 ocean_assert(firstRow + numberRows <= height);
4875 for (
unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4877 rowOperatorFunction(source + y * sourceStrideElements, target + y * targetStrideElements, width, height, y, sourceStrideElements, targetStrideElements);
4881template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2>
4884 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2,
"Invalid channel factors!");
4886 ocean_assert(channelMultiplicationFactors_128 !=
nullptr);
4887 const unsigned int* channelFactors_128 =
reinterpret_cast<const unsigned int*
>(channelMultiplicationFactors_128);
4888 ocean_assert(channelFactors_128 !=
nullptr);
4890 const unsigned int factorChannel0_128 = channelFactors_128[0];
4891 const unsigned int factorChannel1_128 = channelFactors_128[1];
4892 const unsigned int factorChannel2_128 = channelFactors_128[2];
4894 ocean_assert(factorChannel0_128 <= 128u && factorChannel1_128 <= 128u && factorChannel2_128 <= 128u);
4895 ocean_assert(factorChannel0_128 + factorChannel1_128 + factorChannel2_128 == 128u);
4897 ocean_assert(tUseFactorChannel0 == (factorChannel0_128 != 0u));
4898 ocean_assert(tUseFactorChannel1 == (factorChannel1_128 != 0u));
4899 ocean_assert(tUseFactorChannel2 == (factorChannel2_128 != 0u));
4901 ocean_assert(source !=
nullptr && target !=
nullptr && size >= 1);
4903 const uint8_t*
const targetEnd = target + size;
4905#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
4907 constexpr size_t blockSize = 16;
4908 const size_t blocks = size / blockSize;
4910 const __m128i multiplicationFactors0_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel0_128));
4911 const __m128i multiplicationFactors1_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel1_128));
4912 const __m128i multiplicationFactors2_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel2_128));
4914 for (
size_t n = 0; n < blocks; ++n)
4918 source += blockSize *
size_t(3);
4919 target += blockSize;
4922#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4924 constexpr size_t blockSize = 8;
4925 const size_t blocks = size / blockSize;
4927 const uint8x8_t factorChannel0_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel0_128);
4928 const uint8x8_t factorChannel1_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel1_128);
4929 const uint8x8_t factorChannel2_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel2_128);
4931 for (
size_t n = 0; n < blocks; ++n)
4933 convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON<tUseFactorChannel0, tUseFactorChannel1, tUseFactorChannel2>(source, target, factorChannel0_128_u_8x8, factorChannel1_128_u_8x8, factorChannel2_128_u_8x8);
4935 source += blockSize *
size_t(3);
4936 target += blockSize;
4941 while (target != targetEnd)
4943 ocean_assert(target < targetEnd);
4945 const unsigned int channel0 = tUseFactorChannel0 ? (source[0] * factorChannel0_128) : 0u;
4946 const unsigned int channel1 = tUseFactorChannel1 ? (source[1] * factorChannel1_128) : 0u;
4947 const unsigned int channel2 = tUseFactorChannel2 ? (source[2] * factorChannel2_128) : 0u;
4949 *target++ = (uint8_t)((channel0 + channel1 + channel2 + 64u) >> 7u);
4954template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2,
bool tUseFactorChannel3>
4957 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2 || tUseFactorChannel3,
"Invalid channel factors!");
4959 ocean_assert(channelMultiplicationFactors_128 !=
nullptr);
4960 const unsigned int* channelFactors_128 =
reinterpret_cast<const unsigned int*
>(channelMultiplicationFactors_128);
4961 ocean_assert(channelFactors_128 !=
nullptr);
4963 const unsigned int factorChannel0_128 = channelFactors_128[0];
4964 const unsigned int factorChannel1_128 = channelFactors_128[1];
4965 const unsigned int factorChannel2_128 = channelFactors_128[2];
4966 const unsigned int factorChannel3_128 = channelFactors_128[3];
4968 ocean_assert(factorChannel0_128 <= 127u && factorChannel1_128 <= 127u && factorChannel2_128 <= 127u && factorChannel3_128 <= 127u);
4969 ocean_assert(factorChannel0_128 + factorChannel1_128 + factorChannel2_128 + factorChannel3_128 == 128u);
4971 ocean_assert(tUseFactorChannel0 == (factorChannel0_128 != 0u));
4972 ocean_assert(tUseFactorChannel1 == (factorChannel1_128 != 0u));
4973 ocean_assert(tUseFactorChannel2 == (factorChannel2_128 != 0u));
4974 ocean_assert(tUseFactorChannel3 == (factorChannel3_128 != 0u));
4976 ocean_assert(source !=
nullptr && target !=
nullptr && size >= 1);
4978 const uint8_t*
const targetEnd = target + size;
4980#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
4982 constexpr size_t blockSize = 16;
4983 const size_t blocks = size / blockSize;
4985 const __m128i m128_multiplicationFactors = _mm_set1_epi32(
int(factorChannel0_128 | (factorChannel1_128 << 8u) | (factorChannel2_128 << 16u) | (factorChannel3_128 << 24u)));
4987 for (
size_t n = 0; n < blocks; ++n)
4991 source += blockSize *
size_t(4);
4992 target += blockSize;
4995#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4997 constexpr size_t blockSize = 8;
4998 const size_t blocks = size / blockSize;
5000 const uint8x8_t factorChannel0_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel0_128);
5001 const uint8x8_t factorChannel1_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel1_128);
5002 const uint8x8_t factorChannel2_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel2_128);
5003 const uint8x8_t factorChannel3_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel3_128);
5005 for (
size_t n = 0; n < blocks; ++n)
5007 convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON<tUseFactorChannel0, tUseFactorChannel1, tUseFactorChannel2, tUseFactorChannel3>(source, target, factorChannel0_128_u_8x8, factorChannel1_128_u_8x8, factorChannel2_128_u_8x8, factorChannel3_128_u_8x8);
5009 source += blockSize *
size_t(4);
5010 target += blockSize;
5015 while (target != targetEnd)
5017 ocean_assert(target < targetEnd);
5019 const unsigned int channel0 = tUseFactorChannel0 ? (source[0] * factorChannel0_128) : 0u;
5020 const unsigned int channel1 = tUseFactorChannel1 ? (source[1] * factorChannel1_128) : 0u;
5021 const unsigned int channel2 = tUseFactorChannel2 ? (source[2] * factorChannel2_128) : 0u;
5022 const unsigned int channel3 = tUseFactorChannel3 ? (source[3] * factorChannel3_128) : 0u;
5024 *target++ = (uint8_t)((channel0 + channel1 + channel2 + channel3 + 64u) >> 7u);
5029template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
5032 static_assert(tChannels >= 2u,
"Invalid channel number!");
5033 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
5035 ocean_assert(frame !=
nullptr);
5036 ocean_assert(width >= 1u);
5038 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
5040 uint8_t* frameRow = frame + frameStrideElements * firstRow;
5042 for (
unsigned int y = 0u; y < numberRows; ++y)
5044 for (
unsigned int x = 0u; x < width; ++x)
5046 if (frameRow[tAlphaChannelIndex])
5048 const uint8_t alpha_2 = frameRow[tAlphaChannelIndex] / 2u;
5050 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5052 if (channelIndex != tAlphaChannelIndex)
5054 frameRow[channelIndex] = uint8_t(std::min((frameRow[channelIndex] * 255u + alpha_2) / frameRow[tAlphaChannelIndex], 255u));
5059 frameRow += tChannels;
5062 frameRow += framePaddingElements;
5066template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
5069 static_assert(tChannels >= 2u,
"Invalid channel number!");
5070 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
5072 ocean_assert(source !=
nullptr && target !=
nullptr);
5073 ocean_assert(width >= 1u);
5075 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
5076 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
5078 const uint8_t* sourceRow = source + sourceStrideElements * firstRow;
5079 uint8_t* targetRow = target + targetStrideElements * firstRow;
5081 for (
unsigned int y = 0u; y < numberRows; ++y)
5083 for (
unsigned int x = 0u; x < width; ++x)
5085 if (sourceRow[tAlphaChannelIndex])
5087 const uint8_t alpha_2 = sourceRow[tAlphaChannelIndex] / 2u;
5089 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5091 if (channelIndex != tAlphaChannelIndex)
5093 targetRow[channelIndex] = uint8_t(std::max((sourceRow[channelIndex] * 255u + alpha_2) / sourceRow[tAlphaChannelIndex], 255u));
5097 targetRow[channelIndex] = sourceRow[channelIndex];
5103 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5105 targetRow[channelIndex] = sourceRow[channelIndex];
5109 sourceRow += tChannels;
5110 targetRow += tChannels;
5113 sourceRow += sourcePaddingElements;
5114 targetRow += targetPaddingElements;
5118template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
5121 static_assert(tChannels >= 2u,
"Invalid channel number!");
5122 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
5124 ocean_assert(frame !=
nullptr);
5125 ocean_assert(width >= 1u);
5127 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
5129 uint8_t* frameRow = frame + frameStrideElements * firstRow;
5131 for (
unsigned int y = 0u; y < numberRows; ++y)
5133 for (
unsigned int x = 0u; x < width; ++x)
5135 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5137 if (channelIndex != tAlphaChannelIndex)
5139 frameRow[channelIndex] = (frameRow[channelIndex] * frameRow[tAlphaChannelIndex] + 127u) / 255u;
5143 frameRow += tChannels;
5146 frameRow += framePaddingElements;
5150template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
5153 static_assert(tChannels >= 2u,
"Invalid channel number!");
5154 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
5156 ocean_assert(source !=
nullptr && target !=
nullptr);
5157 ocean_assert(width >= 1u);
5159 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
5160 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
5162 const uint8_t* sourceRow = source + sourceStrideElements * firstRow;
5163 uint8_t* targetRow = target + targetStrideElements * firstRow;
5165 for (
unsigned int y = 0u; y < numberRows; ++y)
5167 for (
unsigned int x = 0u; x < width; ++x)
5169 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5171 if (channelIndex != tAlphaChannelIndex)
5173 targetRow[channelIndex] = (sourceRow[channelIndex] * sourceRow[tAlphaChannelIndex] + 127u) / 255u;
5177 targetRow[channelIndex] = sourceRow[channelIndex];
5181 sourceRow += tChannels;
5182 targetRow += tChannels;
5185 sourceRow += sourcePaddingElements;
5186 targetRow += targetPaddingElements;
5190#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
5194 ocean_assert(source !=
nullptr && target !=
nullptr);
5211 const __m128i constant64_u_16x8 = _mm_set1_epi32(0x00400040);
5213 const __m128i sourceA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5214 const __m128i sourceB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5215 const __m128i sourceC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5217 __m128i channel0_u_8x16;
5218 __m128i channel1_u_8x16;
5219 __m128i channel2_u_8x16;
5228 const __m128i channel0_high_u_8x16 = _mm_srli_epi16(channel0_u_8x16, 8);
5229 const __m128i channel1_high_u_8x16 = _mm_srli_epi16(channel1_u_8x16, 8);
5230 const __m128i channel2_high_u_8x16 = _mm_srli_epi16(channel2_u_8x16, 8);
5234 const __m128i result0_low_u_8x16 = _mm_mullo_epi16(channel0_low_u_8x16, multiplicationFactors0_128_u_16x8);
5235 const __m128i result0_high_u_8x16 = _mm_mullo_epi16(channel0_high_u_8x16, multiplicationFactors0_128_u_16x8);
5237 const __m128i result1_low_u_8x16 = _mm_mullo_epi16(channel1_low_u_8x16, multiplicationFactors1_128_u_16x8);
5238 const __m128i result1_high_u_8x16 = _mm_mullo_epi16(channel1_high_u_8x16, multiplicationFactors1_128_u_16x8);
5240 const __m128i result2_low_u_8x16 = _mm_mullo_epi16(channel2_low_u_8x16, multiplicationFactors2_128_u_16x8);
5241 const __m128i result2_high_u_8x16 = _mm_mullo_epi16(channel2_high_u_8x16, multiplicationFactors2_128_u_16x8);
5244 const __m128i result128_low_u_8x16 = _mm_adds_epu16(_mm_adds_epu16(result0_low_u_8x16, result1_low_u_8x16), _mm_adds_epu16(result2_low_u_8x16, constant64_u_16x8));
5245 const __m128i result128_high_u_8x16 = _mm_adds_epu16(_mm_adds_epu16(result0_high_u_8x16, result1_high_u_8x16), _mm_adds_epu16(result2_high_u_8x16, constant64_u_16x8));
5248 const __m128i result_low_u_8x16 = _mm_srli_epi16(result128_low_u_8x16, 7);
5249 const __m128i result_high_u_8x16 = _mm_srli_epi16(result128_high_u_8x16, 7);
5252 const __m128i result_u_8x16 = _mm_or_si128(result_low_u_8x16, _mm_slli_epi16(result_high_u_8x16, 8));
5255 _mm_storeu_si128((__m128i*)target, result_u_8x16);
5258OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_128_s_16x8,
const __m128i& factorChannel10_128_s_16x8,
const __m128i& factorChannel20_128_s_16x8,
const __m128i& factorChannel01_128_s_16x8,
const __m128i& factorChannel11_128_s_16x8,
const __m128i& factorChannel21_128_s_16x8,
const __m128i& factorChannel02_128_s_16x8,
const __m128i& factorChannel12_128_s_16x8,
const __m128i& factorChannel22_128_s_16x8,
const __m128i& biasChannel0_s_16x8,
const __m128i& biasChannel1_s_16x8,
const __m128i& biasChannel2_s_16x8)
5260 ocean_assert(source !=
nullptr && target !=
nullptr);
5279 const __m128i sourceA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5280 const __m128i sourceB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5281 const __m128i sourceC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5283 __m128i channel0_u_8x16;
5284 __m128i channel1_u_8x16;
5285 __m128i channel2_u_8x16;
5294 const __m128i channel0_high_u_8x16 = _mm_srli_epi16(channel0_u_8x16, 8);
5295 const __m128i channel1_high_u_8x16 = _mm_srli_epi16(channel1_u_8x16, 8);
5296 const __m128i channel2_high_u_8x16 = _mm_srli_epi16(channel2_u_8x16, 8);
5300 __m128i result0_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel00_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel01_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel02_128_s_16x8));
5301 __m128i result1_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel10_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel11_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel12_128_s_16x8));
5302 __m128i result2_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel20_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel21_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel22_128_s_16x8));
5304 __m128i result0_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel00_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel01_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel02_128_s_16x8));
5305 __m128i result1_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel10_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel11_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel12_128_s_16x8));
5306 __m128i result2_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel20_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel21_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel22_128_s_16x8));
5320 const __m128i constant255_s_16x8 = _mm_set1_epi16(255);
5322 result0_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result0_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5323 result1_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result1_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5324 result2_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result2_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5326 result0_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result0_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5327 result1_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result1_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5328 result2_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result2_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5331 const __m128i result0_u_8x16 = _mm_or_si128(result0_low_u_8x16, _mm_slli_epi16(result0_high_u_8x16, 8));
5332 const __m128i result1_u_8x16 = _mm_or_si128(result1_low_u_8x16, _mm_slli_epi16(result1_high_u_8x16, 8));
5333 const __m128i result2_u_8x16 = _mm_or_si128(result2_low_u_8x16, _mm_slli_epi16(result2_high_u_8x16, 8));
5335 __m128i resultA_u_8x16;
5336 __m128i resultB_u_8x16;
5337 __m128i resultC_u_8x16;
5341 _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5342 _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5343 _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5346OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_1024_s_16x8,
const __m128i& factorChannel10_1024_s_16x8,
const __m128i& factorChannel20_1024_s_16x8,
const __m128i& factorChannel01_1024_s_16x8,
const __m128i& factorChannel11_1024_s_16x8,
const __m128i& factorChannel21_1024_s_16x8,
const __m128i& factorChannel02_1024_s_16x8,
const __m128i& factorChannel12_1024_s_16x8,
const __m128i& factorChannel22_1024_s_16x8,
const __m128i& biasChannel0_1024_s_32x4,
const __m128i& biasChannel1_1024_s_32x4,
const __m128i& biasChannel2_1024_s_32x4)
5348 ocean_assert(source !=
nullptr && target !=
nullptr);
5368 const __m128i sourceA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5369 const __m128i sourceB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5370 const __m128i sourceC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5372 __m128i channel0_u_8x16;
5373 __m128i channel1_u_8x16;
5374 __m128i channel2_u_8x16;
5384 const __m128i channel0_high_u_16x8 = _mm_srli_epi16(channel0_u_8x16, 8);
5385 const __m128i channel1_high_u_16x8 = _mm_srli_epi16(channel1_u_8x16, 8);
5386 const __m128i channel2_high_u_16x8 = _mm_srli_epi16(channel2_u_8x16, 8);
5391 __m128i result0_low_A_s_32x4;
5392 __m128i result0_low_B_s_32x4;
5393 __m128i result0_high_A_s_32x4;
5394 __m128i result0_high_B_s_32x4;
5411 __m128i result1_low_A_s_32x4;
5412 __m128i result1_low_B_s_32x4;
5413 __m128i result1_high_A_s_32x4;
5414 __m128i result1_high_B_s_32x4;
5431 __m128i result2_low_A_s_32x4;
5432 __m128i result2_low_B_s_32x4;
5433 __m128i result2_high_A_s_32x4;
5434 __m128i result2_high_B_s_32x4;
5454 const __m128i mask_0000FFFF_32x4 = _mm_set1_epi32(0x0000FFFF);
5456 __m128i result0_A_s_16x8 = _mm_or_si128(_mm_and_si128(result0_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result0_high_A_s_32x4, 16));
5457 __m128i result0_B_s_16x8 = _mm_or_si128(_mm_and_si128(result0_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result0_high_B_s_32x4, 16));
5459 __m128i result1_A_s_16x8 = _mm_or_si128(_mm_and_si128(result1_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result1_high_A_s_32x4, 16));
5460 __m128i result1_B_s_16x8 = _mm_or_si128(_mm_and_si128(result1_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result1_high_B_s_32x4, 16));
5462 __m128i result2_A_s_16x8 = _mm_or_si128(_mm_and_si128(result2_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result2_high_A_s_32x4, 16));
5463 __m128i result2_B_s_16x8 = _mm_or_si128(_mm_and_si128(result2_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result2_high_B_s_32x4, 16));
5468 const __m128i result0_u_8x16 = _mm_packus_epi16(result0_A_s_16x8, result0_B_s_16x8);
5469 const __m128i result1_u_8x16 = _mm_packus_epi16(result1_A_s_16x8, result1_B_s_16x8);
5470 const __m128i result2_u_8x16 = _mm_packus_epi16(result2_A_s_16x8, result2_B_s_16x8);
5472 __m128i resultA_u_8x16;
5473 __m128i resultB_u_8x16;
5474 __m128i resultC_u_8x16;
5478 _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5479 _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5480 _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5485 ocean_assert(source !=
nullptr && target !=
nullptr);
5508 const __m128i constant64_u_8x16 = _mm_set1_epi32(0x00400040);
5510 const __m128i pixelsA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5511 const __m128i pixelsB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5512 const __m128i pixelsC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5513 const __m128i pixelsD_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 3);
5518 const __m128i intermediateResults0_u_16x8 = _mm_maddubs_epi16(pixelsA_u_8x16, multiplicationFactors0123_128_s_32x4);
5519 const __m128i intermediateResults1_u_16x8 = _mm_maddubs_epi16(pixelsB_u_8x16, multiplicationFactors0123_128_s_32x4);
5520 const __m128i intermediateResults2_u_16x8 = _mm_maddubs_epi16(pixelsC_u_8x16, multiplicationFactors0123_128_s_32x4);
5521 const __m128i intermediateResults3_u_16x8 = _mm_maddubs_epi16(pixelsD_u_8x16, multiplicationFactors0123_128_s_32x4);
5524 __m128i grayA_u_16x8 = _mm_hadd_epi16(intermediateResults0_u_16x8, intermediateResults1_u_16x8);
5525 __m128i grayB_u_16x8 = _mm_hadd_epi16(intermediateResults2_u_16x8, intermediateResults3_u_16x8);
5528 grayA_u_16x8 = _mm_add_epi16(grayA_u_16x8, constant64_u_8x16);
5529 grayB_u_16x8 = _mm_add_epi16(grayB_u_16x8, constant64_u_8x16);
5532 grayA_u_16x8 = _mm_srli_epi16(grayA_u_16x8, 7);
5533 grayB_u_16x8 = _mm_srli_epi16(grayB_u_16x8, 7);
5540 const __m128i gray_u_8x16 = _mm_packus_epi16(grayA_u_16x8, grayB_u_16x8);
5543 _mm_storeu_si128((__m128i*)target, gray_u_8x16);
5548 ocean_assert(source !=
nullptr && target !=
nullptr);
5569 const __m128i constant64_u_8x16 = _mm_set1_epi32(0x00400040);
5571 const __m128i pixelsA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5572 const __m128i pixelsB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5573 const __m128i pixelsC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5574 const __m128i pixelsD_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 3);
5578 const __m128i pixelsA_u_16x8 = _mm_unpacklo_epi8(pixelsA_u_8x16, _mm_setzero_si128());
5579 const __m128i pixelsB_u_16x8 = _mm_unpackhi_epi8(pixelsA_u_8x16, _mm_setzero_si128());
5581 const __m128i pixelsC_u_16x8 = _mm_unpacklo_epi8(pixelsB_u_8x16, _mm_setzero_si128());
5582 const __m128i pixelsD_u_16x8 = _mm_unpackhi_epi8(pixelsB_u_8x16, _mm_setzero_si128());
5584 const __m128i pixelsE_u_16x8 = _mm_unpacklo_epi8(pixelsC_u_8x16, _mm_setzero_si128());
5585 const __m128i pixelsF_u_16x8 = _mm_unpackhi_epi8(pixelsC_u_8x16, _mm_setzero_si128());
5587 const __m128i pixelsG_u_16x8 = _mm_unpacklo_epi8(pixelsD_u_8x16, _mm_setzero_si128());
5588 const __m128i pixelsH_u_16x8 = _mm_unpackhi_epi8(pixelsD_u_8x16, _mm_setzero_si128());
5594 const __m128i intermediateResultsChannel0_0_u_32x4 = _mm_madd_epi16(pixelsA_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5595 const __m128i intermediateResultsChannel0_1_u_32x4 = _mm_madd_epi16(pixelsB_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5596 const __m128i intermediateResultsChannel0_2_u_32x4 = _mm_madd_epi16(pixelsC_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5597 const __m128i intermediateResultsChannel0_3_u_32x4 = _mm_madd_epi16(pixelsD_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5598 const __m128i intermediateResultsChannel0_4_u_32x4 = _mm_madd_epi16(pixelsE_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5599 const __m128i intermediateResultsChannel0_5_u_32x4 = _mm_madd_epi16(pixelsF_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5600 const __m128i intermediateResultsChannel0_6_u_32x4 = _mm_madd_epi16(pixelsG_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5601 const __m128i intermediateResultsChannel0_7_u_32x4 = _mm_madd_epi16(pixelsH_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5603 const __m128i resultsChannel0_A_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_0_u_32x4, intermediateResultsChannel0_1_u_32x4);
5604 const __m128i resultsChannel0_B_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_2_u_32x4, intermediateResultsChannel0_3_u_32x4);
5605 const __m128i resultsChannel0_C_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_4_u_32x4, intermediateResultsChannel0_5_u_32x4);
5606 const __m128i resultsChannel0_D_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_6_u_32x4, intermediateResultsChannel0_7_u_32x4);
5609 const __m128i intermediateResultsChannel1_0_u_32x4 = _mm_madd_epi16(pixelsA_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5610 const __m128i intermediateResultsChannel1_1_u_32x4 = _mm_madd_epi16(pixelsB_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5611 const __m128i intermediateResultsChannel1_2_u_32x4 = _mm_madd_epi16(pixelsC_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5612 const __m128i intermediateResultsChannel1_3_u_32x4 = _mm_madd_epi16(pixelsD_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5613 const __m128i intermediateResultsChannel1_4_u_32x4 = _mm_madd_epi16(pixelsE_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5614 const __m128i intermediateResultsChannel1_5_u_32x4 = _mm_madd_epi16(pixelsF_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5615 const __m128i intermediateResultsChannel1_6_u_32x4 = _mm_madd_epi16(pixelsG_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5616 const __m128i intermediateResultsChannel1_7_u_32x4 = _mm_madd_epi16(pixelsH_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5618 const __m128i resultsChannel1_A_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_0_u_32x4, intermediateResultsChannel1_1_u_32x4);
5619 const __m128i resultsChannel1_B_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_2_u_32x4, intermediateResultsChannel1_3_u_32x4);
5620 const __m128i resultsChannel1_C_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_4_u_32x4, intermediateResultsChannel1_5_u_32x4);
5621 const __m128i resultsChannel1_D_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_6_u_32x4, intermediateResultsChannel1_7_u_32x4);
5625 __m128i resultA_u_16x8 = _mm_or_si128(resultsChannel0_A_u_32x4, _mm_slli_epi32(resultsChannel1_A_u_32x4, 16));
5626 __m128i resultB_u_16x8 = _mm_or_si128(resultsChannel0_B_u_32x4, _mm_slli_epi32(resultsChannel1_B_u_32x4, 16));
5627 __m128i resultC_u_16x8 = _mm_or_si128(resultsChannel0_C_u_32x4, _mm_slli_epi32(resultsChannel1_C_u_32x4, 16));
5628 __m128i resultD_u_16x8 = _mm_or_si128(resultsChannel0_D_u_32x4, _mm_slli_epi32(resultsChannel1_D_u_32x4, 16));
5631 resultA_u_16x8 = _mm_add_epi16(resultA_u_16x8, constant64_u_8x16);
5632 resultB_u_16x8 = _mm_add_epi16(resultB_u_16x8, constant64_u_8x16);
5633 resultC_u_16x8 = _mm_add_epi16(resultC_u_16x8, constant64_u_8x16);
5634 resultD_u_16x8 = _mm_add_epi16(resultD_u_16x8, constant64_u_8x16);
5637 resultA_u_16x8 = _mm_srli_epi16(resultA_u_16x8, 7);
5638 resultB_u_16x8 = _mm_srli_epi16(resultB_u_16x8, 7);
5639 resultC_u_16x8 = _mm_srli_epi16(resultC_u_16x8, 7);
5640 resultD_u_16x8 = _mm_srli_epi16(resultD_u_16x8, 7);
5647 const __m128i resultAB_u_8x16 = _mm_packus_epi16(resultA_u_16x8, resultB_u_16x8);
5648 const __m128i resultCD_u_8x16 = _mm_packus_epi16(resultC_u_16x8, resultD_u_16x8);
5651 _mm_storeu_si128((__m128i*)target + 0, resultAB_u_8x16);
5652 _mm_storeu_si128((__m128i*)target + 1, resultCD_u_8x16);
5657#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5659template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2>
5662 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2,
"Invalid multiplication factors!");
5664 ocean_assert(source !=
nullptr && target !=
nullptr);
5683 uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5685 uint16x8_t intermediateResults_u_16x8;
5689 if constexpr (tUseFactorChannel0)
5691 intermediateResults_u_16x8 = vmull_u8(source_u_8x8x3.val[0], factorChannel0_128_u_8x8);
5695 intermediateResults_u_16x8 = vdupq_n_u16(0u);
5700 if constexpr (tUseFactorChannel1)
5702 intermediateResults_u_16x8 = vmlal_u8(intermediateResults_u_16x8, source_u_8x8x3.val[1], factorChannel1_128_u_8x8);
5707 if constexpr (tUseFactorChannel2)
5709 intermediateResults_u_16x8 = vmlal_u8(intermediateResults_u_16x8, source_u_8x8x3.val[2], factorChannel2_128_u_8x8);
5713 uint8x8_t results_u_8x8 = vqrshrn_n_u16(intermediateResults_u_16x8, 7);
5716 vst1_u8(target, results_u_8x8);
5719OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8)
5721 ocean_assert(source !=
nullptr && target !=
nullptr);
5741 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5744 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[0], biasChannel0_u_8x8));
5745 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[1], biasChannel1_u_8x8));
5746 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[2], biasChannel2_u_8x8));
5750 int16x8_t intermediateResults0_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel00_64_s_16x8);
5751 int16x8_t intermediateResults1_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel10_64_s_16x8);
5752 int16x8_t intermediateResults2_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel20_64_s_16x8);
5754 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, vmulq_s16(source1_s_16x8, factorChannel01_64_s_16x8));
5755 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, vmulq_s16(source1_s_16x8, factorChannel11_64_s_16x8));
5756 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, vmulq_s16(source1_s_16x8, factorChannel21_64_s_16x8));
5758 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, vmulq_s16(source2_s_16x8, factorChannel02_64_s_16x8));
5759 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, vmulq_s16(source2_s_16x8, factorChannel12_64_s_16x8));
5760 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, vmulq_s16(source2_s_16x8, factorChannel22_64_s_16x8));
5762 uint8x8x3_t results_u_8x8x3;
5765 results_u_8x8x3.val[0] = vqrshrun_n_s16(intermediateResults0_s_16x8, 6);
5766 results_u_8x8x3.val[1] = vqrshrun_n_s16(intermediateResults1_s_16x8, 6);
5767 results_u_8x8x3.val[2] = vqrshrun_n_s16(intermediateResults2_s_16x8, 6);
5770 vst3_u8(target, results_u_8x8x3);
5773OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8)
5775 ocean_assert(source !=
nullptr && target !=
nullptr);
5790 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
5793 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
5794 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
5795 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
5797 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
5798 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
5799 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
5803 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_64_s_16x8);
5804 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_64_s_16x8);
5805 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_64_s_16x8);
5807 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_64_s_16x8);
5808 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_64_s_16x8);
5809 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_64_s_16x8);
5811 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel01_64_s_16x8));
5812 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel11_64_s_16x8));
5813 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel21_64_s_16x8));
5815 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel01_64_s_16x8));
5816 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel11_64_s_16x8));
5817 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel21_64_s_16x8));
5819 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel02_64_s_16x8));
5820 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel12_64_s_16x8));
5821 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel22_64_s_16x8));
5823 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel02_64_s_16x8));
5824 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel12_64_s_16x8));
5825 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel22_64_s_16x8));
5827 uint8x16x3_t results_u_8x16x3;
5830 results_u_8x16x3.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 6));
5831 results_u_8x16x3.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 6));
5832 results_u_8x16x3.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 6));
5835 vst3q_u8(target, results_u_8x16x3);
5838OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_128_s_16x8,
const int16x8_t& factorChannel10_128_s_16x8,
const int16x8_t& factorChannel20_128_s_16x8,
const int16x8_t& factorChannel01_128_s_16x8,
const int16x8_t& factorChannel11_128_s_16x8,
const int16x8_t& factorChannel21_128_s_16x8,
const int16x8_t& factorChannel02_128_s_16x8,
const int16x8_t& factorChannel12_128_s_16x8,
const int16x8_t& factorChannel22_128_s_16x8,
const int16x8_t& biasChannel0_128_s_16x8,
const int16x8_t& biasChannel1_128_s_16x8,
const int16x8_t& biasChannel2_128_s_16x8)
5840 ocean_assert(source !=
nullptr && target !=
nullptr);
5860 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5862 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[0]));
5863 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[1]));
5864 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[2]));
5866 int16x8_t intermediateResults0_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel00_128_s_16x8);
5867 int16x8_t intermediateResults1_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel10_128_s_16x8);
5868 int16x8_t intermediateResults2_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel20_128_s_16x8);
5870 intermediateResults0_s_16x8 = vmlaq_s16(intermediateResults0_s_16x8, source1_s_16x8, factorChannel01_128_s_16x8);
5871 intermediateResults1_s_16x8 = vmlaq_s16(intermediateResults1_s_16x8, source1_s_16x8, factorChannel11_128_s_16x8);
5872 intermediateResults2_s_16x8 = vmlaq_s16(intermediateResults2_s_16x8, source1_s_16x8, factorChannel21_128_s_16x8);
5874 intermediateResults0_s_16x8 = vmlaq_s16(intermediateResults0_s_16x8, source2_s_16x8, factorChannel02_128_s_16x8);
5875 intermediateResults1_s_16x8 = vmlaq_s16(intermediateResults1_s_16x8, source2_s_16x8, factorChannel12_128_s_16x8);
5876 intermediateResults2_s_16x8 = vmlaq_s16(intermediateResults2_s_16x8, source2_s_16x8, factorChannel22_128_s_16x8);
5880 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, biasChannel0_128_s_16x8);
5881 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, biasChannel1_128_s_16x8);
5882 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, biasChannel2_128_s_16x8);
5884 uint8x8x3_t results_u_8x8x3;
5887 results_u_8x8x3.val[0] = vqrshrun_n_s16(intermediateResults0_s_16x8, 7);
5888 results_u_8x8x3.val[1] = vqrshrun_n_s16(intermediateResults1_s_16x8, 7);
5889 results_u_8x8x3.val[2] = vqrshrun_n_s16(intermediateResults2_s_16x8, 7);
5892 vst3_u8(target, results_u_8x8x3);
5895OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x4_t& factorChannel00_1024_s_16x4,
const int16x4_t& factorChannel10_1024_s_16x4,
const int16x4_t& factorChannel20_1024_s_16x4,
const int16x4_t& factorChannel01_1024_s_16x4,
const int16x4_t& factorChannel11_1024_s_16x4,
const int16x4_t& factorChannel21_1024_s_16x4,
const int16x4_t& factorChannel02_1024_s_16x4,
const int16x4_t& factorChannel12_1024_s_16x4,
const int16x4_t& factorChannel22_1024_s_16x4,
const int32x4_t& biasChannel0_1024_s_32x4,
const int32x4_t& biasChannel1_1024_s_32x4,
const int32x4_t& biasChannel2_1024_s_32x4)
5897 ocean_assert(source !=
nullptr && target !=
nullptr);
5918 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5920 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[0]));
5921 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[1]));
5922 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[2]));
5924 const int16x4_t source0_low_s_16x4 = vget_low_s16(source0_s_16x8);
5925 const int16x4_t source0_high_s_16x4 = vget_high_s16(source0_s_16x8);
5927 int32x4_t intermediateResults0_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel00_1024_s_16x4);
5928 int32x4_t intermediateResults0_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel00_1024_s_16x4);
5930 int32x4_t intermediateResults1_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel10_1024_s_16x4);
5931 int32x4_t intermediateResults1_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel10_1024_s_16x4);
5933 int32x4_t intermediateResults2_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel20_1024_s_16x4);
5934 int32x4_t intermediateResults2_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel20_1024_s_16x4);
5937 const int16x4_t source1_low_s_16x4 = vget_low_s16(source1_s_16x8);
5938 const int16x4_t source1_high_s_16x4 = vget_high_s16(source1_s_16x8);
5940 intermediateResults0_low_s_32x4 = vmlal_s16(intermediateResults0_low_s_32x4, source1_low_s_16x4, factorChannel01_1024_s_16x4);
5941 intermediateResults0_high_s_32x4 = vmlal_s16(intermediateResults0_high_s_32x4, source1_high_s_16x4, factorChannel01_1024_s_16x4);
5943 intermediateResults1_low_s_32x4 = vmlal_s16(intermediateResults1_low_s_32x4, source1_low_s_16x4, factorChannel11_1024_s_16x4);
5944 intermediateResults1_high_s_32x4 = vmlal_s16(intermediateResults1_high_s_32x4, source1_high_s_16x4, factorChannel11_1024_s_16x4);
5946 intermediateResults2_low_s_32x4 = vmlal_s16(intermediateResults2_low_s_32x4, source1_low_s_16x4, factorChannel21_1024_s_16x4);
5947 intermediateResults2_high_s_32x4 = vmlal_s16(intermediateResults2_high_s_32x4, source1_high_s_16x4, factorChannel21_1024_s_16x4);
5950 const int16x4_t source2_low_s_16x4 = vget_low_s16(source2_s_16x8);
5951 const int16x4_t source2_high_s_16x4 = vget_high_s16(source2_s_16x8);
5953 intermediateResults0_low_s_32x4 = vmlal_s16(intermediateResults0_low_s_32x4, source2_low_s_16x4, factorChannel02_1024_s_16x4);
5954 intermediateResults0_high_s_32x4 = vmlal_s16(intermediateResults0_high_s_32x4, source2_high_s_16x4, factorChannel02_1024_s_16x4);
5956 intermediateResults1_low_s_32x4 = vmlal_s16(intermediateResults1_low_s_32x4, source2_low_s_16x4, factorChannel12_1024_s_16x4);
5957 intermediateResults1_high_s_32x4 = vmlal_s16(intermediateResults1_high_s_32x4, source2_high_s_16x4, factorChannel12_1024_s_16x4);
5959 intermediateResults2_low_s_32x4 = vmlal_s16(intermediateResults2_low_s_32x4, source2_low_s_16x4, factorChannel22_1024_s_16x4);
5960 intermediateResults2_high_s_32x4 = vmlal_s16(intermediateResults2_high_s_32x4, source2_high_s_16x4, factorChannel22_1024_s_16x4);
5965 intermediateResults0_low_s_32x4 = vaddq_s32(intermediateResults0_low_s_32x4, biasChannel0_1024_s_32x4);
5966 intermediateResults0_high_s_32x4 = vaddq_s32(intermediateResults0_high_s_32x4, biasChannel0_1024_s_32x4);
5968 intermediateResults1_low_s_32x4 = vaddq_s32(intermediateResults1_low_s_32x4, biasChannel1_1024_s_32x4);
5969 intermediateResults1_high_s_32x4 = vaddq_s32(intermediateResults1_high_s_32x4, biasChannel1_1024_s_32x4);
5971 intermediateResults2_low_s_32x4 = vaddq_s32(intermediateResults2_low_s_32x4, biasChannel2_1024_s_32x4);
5972 intermediateResults2_high_s_32x4 = vaddq_s32(intermediateResults2_high_s_32x4, biasChannel2_1024_s_32x4);
5975 uint8x8x3_t results_u_8x8x3;
5978 results_u_8x8x3.val[0] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_high_s_32x4, 10)));
5979 results_u_8x8x3.val[1] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_high_s_32x4, 10)));
5980 results_u_8x8x3.val[2] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_high_s_32x4, 10)));
5983 vst3_u8(target, results_u_8x8x3);
5986OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x4_t& factorChannel00_1024_s_16x4,
const int16x4_t& factorChannel10_1024_s_16x4,
const int16x4_t& factorChannel20_1024_s_16x4,
const int16x4_t& factorChannel01_1024_s_16x4,
const int16x4_t& factorChannel11_1024_s_16x4,
const int16x4_t& factorChannel21_1024_s_16x4,
const int16x4_t& factorChannel02_1024_s_16x4,
const int16x4_t& factorChannel12_1024_s_16x4,
const int16x4_t& factorChannel22_1024_s_16x4,
const int32x4_t& biasChannel0_1024_s_32x4,
const int32x4_t& biasChannel1_1024_s_32x4,
const int32x4_t& biasChannel2_1024_s_32x4)
5988 ocean_assert(source !=
nullptr && target !=
nullptr);
6009 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6011 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[0])));
6012 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[1])));
6013 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[2])));
6015 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[0])));
6016 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[1])));
6017 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[2])));
6019 const int16x4_t source0_A_s_16x4 = vget_low_s16(source0_low_s_16x8);
6020 const int16x4_t source0_B_s_16x4 = vget_high_s16(source0_low_s_16x8);
6021 const int16x4_t source0_C_s_16x4 = vget_low_s16(source0_high_s_16x8);
6022 const int16x4_t source0_D_s_16x4 = vget_high_s16(source0_high_s_16x8);
6024 int32x4_t intermediateResults0_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel00_1024_s_16x4);
6025 int32x4_t intermediateResults0_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel00_1024_s_16x4);
6026 int32x4_t intermediateResults0_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel00_1024_s_16x4);
6027 int32x4_t intermediateResults0_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel00_1024_s_16x4);
6029 int32x4_t intermediateResults1_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel10_1024_s_16x4);
6030 int32x4_t intermediateResults1_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel10_1024_s_16x4);
6031 int32x4_t intermediateResults1_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel10_1024_s_16x4);
6032 int32x4_t intermediateResults1_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel10_1024_s_16x4);
6034 int32x4_t intermediateResults2_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel20_1024_s_16x4);
6035 int32x4_t intermediateResults2_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel20_1024_s_16x4);
6036 int32x4_t intermediateResults2_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel20_1024_s_16x4);
6037 int32x4_t intermediateResults2_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel20_1024_s_16x4);
6040 const int16x4_t source1_A_s_16x4 = vget_low_s16(source1_low_s_16x8);
6041 const int16x4_t source1_B_s_16x4 = vget_high_s16(source1_low_s_16x8);
6042 const int16x4_t source1_C_s_16x4 = vget_low_s16(source1_high_s_16x8);
6043 const int16x4_t source1_D_s_16x4 = vget_high_s16(source1_high_s_16x8);
6045 intermediateResults0_A_s_32x4 = vmlal_s16(intermediateResults0_A_s_32x4, source1_A_s_16x4, factorChannel01_1024_s_16x4);
6046 intermediateResults0_B_s_32x4 = vmlal_s16(intermediateResults0_B_s_32x4, source1_B_s_16x4, factorChannel01_1024_s_16x4);
6047 intermediateResults0_C_s_32x4 = vmlal_s16(intermediateResults0_C_s_32x4, source1_C_s_16x4, factorChannel01_1024_s_16x4);
6048 intermediateResults0_D_s_32x4 = vmlal_s16(intermediateResults0_D_s_32x4, source1_D_s_16x4, factorChannel01_1024_s_16x4);
6050 intermediateResults1_A_s_32x4 = vmlal_s16(intermediateResults1_A_s_32x4, source1_A_s_16x4, factorChannel11_1024_s_16x4);
6051 intermediateResults1_B_s_32x4 = vmlal_s16(intermediateResults1_B_s_32x4, source1_B_s_16x4, factorChannel11_1024_s_16x4);
6052 intermediateResults1_C_s_32x4 = vmlal_s16(intermediateResults1_C_s_32x4, source1_C_s_16x4, factorChannel11_1024_s_16x4);
6053 intermediateResults1_D_s_32x4 = vmlal_s16(intermediateResults1_D_s_32x4, source1_D_s_16x4, factorChannel11_1024_s_16x4);
6055 intermediateResults2_A_s_32x4 = vmlal_s16(intermediateResults2_A_s_32x4, source1_A_s_16x4, factorChannel21_1024_s_16x4);
6056 intermediateResults2_B_s_32x4 = vmlal_s16(intermediateResults2_B_s_32x4, source1_B_s_16x4, factorChannel21_1024_s_16x4);
6057 intermediateResults2_C_s_32x4 = vmlal_s16(intermediateResults2_C_s_32x4, source1_C_s_16x4, factorChannel21_1024_s_16x4);
6058 intermediateResults2_D_s_32x4 = vmlal_s16(intermediateResults2_D_s_32x4, source1_D_s_16x4, factorChannel21_1024_s_16x4);
6061 const int16x4_t source2_A_s_16x4 = vget_low_s16(source2_low_s_16x8);
6062 const int16x4_t source2_B_s_16x4 = vget_high_s16(source2_low_s_16x8);
6063 const int16x4_t source2_C_s_16x4 = vget_low_s16(source2_high_s_16x8);
6064 const int16x4_t source2_D_s_16x4 = vget_high_s16(source2_high_s_16x8);
6066 intermediateResults0_A_s_32x4 = vmlal_s16(intermediateResults0_A_s_32x4, source2_A_s_16x4, factorChannel02_1024_s_16x4);
6067 intermediateResults0_B_s_32x4 = vmlal_s16(intermediateResults0_B_s_32x4, source2_B_s_16x4, factorChannel02_1024_s_16x4);
6068 intermediateResults0_C_s_32x4 = vmlal_s16(intermediateResults0_C_s_32x4, source2_C_s_16x4, factorChannel02_1024_s_16x4);
6069 intermediateResults0_D_s_32x4 = vmlal_s16(intermediateResults0_D_s_32x4, source2_D_s_16x4, factorChannel02_1024_s_16x4);
6071 intermediateResults1_A_s_32x4 = vmlal_s16(intermediateResults1_A_s_32x4, source2_A_s_16x4, factorChannel12_1024_s_16x4);
6072 intermediateResults1_B_s_32x4 = vmlal_s16(intermediateResults1_B_s_32x4, source2_B_s_16x4, factorChannel12_1024_s_16x4);
6073 intermediateResults1_C_s_32x4 = vmlal_s16(intermediateResults1_C_s_32x4, source2_C_s_16x4, factorChannel12_1024_s_16x4);
6074 intermediateResults1_D_s_32x4 = vmlal_s16(intermediateResults1_D_s_32x4, source2_D_s_16x4, factorChannel12_1024_s_16x4);
6076 intermediateResults2_A_s_32x4 = vmlal_s16(intermediateResults2_A_s_32x4, source2_A_s_16x4, factorChannel22_1024_s_16x4);
6077 intermediateResults2_B_s_32x4 = vmlal_s16(intermediateResults2_B_s_32x4, source2_B_s_16x4, factorChannel22_1024_s_16x4);
6078 intermediateResults2_C_s_32x4 = vmlal_s16(intermediateResults2_C_s_32x4, source2_C_s_16x4, factorChannel22_1024_s_16x4);
6079 intermediateResults2_D_s_32x4 = vmlal_s16(intermediateResults2_D_s_32x4, source2_D_s_16x4, factorChannel22_1024_s_16x4);
6084 intermediateResults0_A_s_32x4 = vaddq_s32(intermediateResults0_A_s_32x4, biasChannel0_1024_s_32x4);
6085 intermediateResults0_B_s_32x4 = vaddq_s32(intermediateResults0_B_s_32x4, biasChannel0_1024_s_32x4);
6086 intermediateResults0_C_s_32x4 = vaddq_s32(intermediateResults0_C_s_32x4, biasChannel0_1024_s_32x4);
6087 intermediateResults0_D_s_32x4 = vaddq_s32(intermediateResults0_D_s_32x4, biasChannel0_1024_s_32x4);
6089 intermediateResults1_A_s_32x4 = vaddq_s32(intermediateResults1_A_s_32x4, biasChannel1_1024_s_32x4);
6090 intermediateResults1_B_s_32x4 = vaddq_s32(intermediateResults1_B_s_32x4, biasChannel1_1024_s_32x4);
6091 intermediateResults1_C_s_32x4 = vaddq_s32(intermediateResults1_C_s_32x4, biasChannel1_1024_s_32x4);
6092 intermediateResults1_D_s_32x4 = vaddq_s32(intermediateResults1_D_s_32x4, biasChannel1_1024_s_32x4);
6094 intermediateResults2_A_s_32x4 = vaddq_s32(intermediateResults2_A_s_32x4, biasChannel2_1024_s_32x4);
6095 intermediateResults2_B_s_32x4 = vaddq_s32(intermediateResults2_B_s_32x4, biasChannel2_1024_s_32x4);
6096 intermediateResults2_C_s_32x4 = vaddq_s32(intermediateResults2_C_s_32x4, biasChannel2_1024_s_32x4);
6097 intermediateResults2_D_s_32x4 = vaddq_s32(intermediateResults2_D_s_32x4, biasChannel2_1024_s_32x4);
6100 uint8x16x3_t results_u_8x16x3;
6103 results_u_8x16x3.val[0] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_D_s_32x4, 10))));
6105 results_u_8x16x3.val[1] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_D_s_32x4, 10))));
6106 results_u_8x16x3.val[2] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_D_s_32x4, 10))));
6109 vst3q_u8(target, results_u_8x16x3);
6112OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_128_s_16x8,
const int16x8_t& factorChannel10_128_s_16x8,
const int16x8_t& factorChannel20_128_s_16x8,
const int16x8_t& factorChannel01_128_s_16x8,
const int16x8_t& factorChannel11_128_s_16x8,
const int16x8_t& factorChannel21_128_s_16x8,
const int16x8_t& factorChannel02_128_s_16x8,
const int16x8_t& factorChannel12_128_s_16x8,
const int16x8_t& factorChannel22_128_s_16x8,
const int16x8_t& biasChannel0_128_s_16x8,
const int16x8_t& biasChannel1_128_s_16x8,
const int16x8_t& biasChannel2_128_s_16x8)
6114 ocean_assert(source !=
nullptr && target !=
nullptr);
6134 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6136 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[0])));
6137 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[1])));
6138 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[2])));
6140 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[0])));
6141 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[1])));
6142 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[2])));
6145 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_128_s_16x8);
6146 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_128_s_16x8);
6147 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_128_s_16x8);
6149 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_128_s_16x8);
6150 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_128_s_16x8);
6151 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_128_s_16x8);
6154 intermediateResults0_low_s_16x8 = vmlaq_s16(intermediateResults0_low_s_16x8, source1_low_s_16x8, factorChannel01_128_s_16x8);
6155 intermediateResults1_low_s_16x8 = vmlaq_s16(intermediateResults1_low_s_16x8, source1_low_s_16x8, factorChannel11_128_s_16x8);
6156 intermediateResults2_low_s_16x8 = vmlaq_s16(intermediateResults2_low_s_16x8, source1_low_s_16x8, factorChannel21_128_s_16x8);
6158 intermediateResults0_high_s_16x8 = vmlaq_s16(intermediateResults0_high_s_16x8, source1_high_s_16x8, factorChannel01_128_s_16x8);
6159 intermediateResults1_high_s_16x8 = vmlaq_s16(intermediateResults1_high_s_16x8, source1_high_s_16x8, factorChannel11_128_s_16x8);
6160 intermediateResults2_high_s_16x8 = vmlaq_s16(intermediateResults2_high_s_16x8, source1_high_s_16x8, factorChannel21_128_s_16x8);
6163 intermediateResults0_low_s_16x8 = vmlaq_s16(intermediateResults0_low_s_16x8, source2_low_s_16x8, factorChannel02_128_s_16x8);
6164 intermediateResults1_low_s_16x8 = vmlaq_s16(intermediateResults1_low_s_16x8, source2_low_s_16x8, factorChannel12_128_s_16x8);
6165 intermediateResults2_low_s_16x8 = vmlaq_s16(intermediateResults2_low_s_16x8, source2_low_s_16x8, factorChannel22_128_s_16x8);
6167 intermediateResults0_high_s_16x8 = vmlaq_s16(intermediateResults0_high_s_16x8, source2_high_s_16x8, factorChannel02_128_s_16x8);
6168 intermediateResults1_high_s_16x8 = vmlaq_s16(intermediateResults1_high_s_16x8, source2_high_s_16x8, factorChannel12_128_s_16x8);
6169 intermediateResults2_high_s_16x8 = vmlaq_s16(intermediateResults2_high_s_16x8, source2_high_s_16x8, factorChannel22_128_s_16x8);
6173 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, biasChannel0_128_s_16x8);
6174 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, biasChannel0_128_s_16x8);
6176 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, biasChannel1_128_s_16x8);
6177 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, biasChannel1_128_s_16x8);
6179 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, biasChannel2_128_s_16x8);
6180 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, biasChannel2_128_s_16x8);
6183 uint8x16x3_t results_u_8x16x3;
6186 results_u_8x16x3.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 7));
6187 results_u_8x16x3.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 7));
6188 results_u_8x16x3.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 7));
6191 vst3q_u8(target, results_u_8x16x3);
6194OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8,
const uint8x16_t& channelValue3_u_8x16)
6196 ocean_assert(source !=
nullptr && target !=
nullptr);
6211 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6214 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
6215 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
6216 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
6218 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
6219 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
6220 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
6224 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_64_s_16x8);
6225 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_64_s_16x8);
6226 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_64_s_16x8);
6228 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_64_s_16x8);
6229 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_64_s_16x8);
6230 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_64_s_16x8);
6232 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel01_64_s_16x8));
6233 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel11_64_s_16x8));
6234 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel21_64_s_16x8));
6236 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel01_64_s_16x8));
6237 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel11_64_s_16x8));
6238 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel21_64_s_16x8));
6240 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel02_64_s_16x8));
6241 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel12_64_s_16x8));
6242 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel22_64_s_16x8));
6244 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel02_64_s_16x8));
6245 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel12_64_s_16x8));
6246 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel22_64_s_16x8));
6248 uint8x16x4_t results_u_8x16x4;
6251 results_u_8x16x4.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 6));
6252 results_u_8x16x4.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 6));
6253 results_u_8x16x4.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 6));
6254 results_u_8x16x4.val[3] = channelValue3_u_8x16;
6257 vst4q_u8(target, results_u_8x16x4);
6260template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2,
bool tUseFactorChannel3>
6263 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2 || tUseFactorChannel3,
"Invalid multiplication factors!");
6265 ocean_assert(source !=
nullptr && target !=
nullptr);
6285 uint8x8x4_t pixels_u_8x8x4 = vld4_u8(source);
6287 uint16x8_t intermediateResults_16x8;
6291 if constexpr (tUseFactorChannel0)
6293 intermediateResults_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel0_128_u_8x8);
6297 intermediateResults_16x8 = vdupq_n_u16(0u);
6302 if constexpr (tUseFactorChannel1)
6304 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[1], factorChannel1_128_u_8x8);
6309 if constexpr (tUseFactorChannel2)
6311 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[2], factorChannel2_128_u_8x8);
6316 if constexpr (tUseFactorChannel3)
6318 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[3], factorChannel3_128_u_8x8);
6322 uint8x8_t results_u_8x8 = vqrshrn_n_u16(intermediateResults_16x8, 7);
6325 vst1_u8(target, results_u_8x8);
6328OCEAN_FORCE_INLINE
void FrameChannels::convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const uint8x8_t& factorChannel00_128_u_8x8,
const uint8x8_t& factorChannel10_128_u_8x8,
const uint8x8_t& factorChannel01_128_u_8x8,
const uint8x8_t& factorChannel11_128_u_8x8,
const uint8x8_t& factorChannel02_128_u_8x8,
const uint8x8_t& factorChannel12_128_u_8x8,
const uint8x8_t& factorChannel03_128_u_8x8,
const uint8x8_t& factorChannel13_128_u_8x8)
6330 ocean_assert(source !=
nullptr && target !=
nullptr);
6352 uint8x8x4_t pixels_u_8x8x4 = vld4_u8(source);
6354 uint16x8_t intermediateResultsChannel0_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel00_128_u_8x8);
6355 uint16x8_t intermediateResultsChannel1_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel10_128_u_8x8);
6357 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[1], factorChannel01_128_u_8x8);
6358 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[1], factorChannel11_128_u_8x8);
6360 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[2], factorChannel02_128_u_8x8);
6361 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[2], factorChannel12_128_u_8x8);
6363 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[3], factorChannel03_128_u_8x8);
6364 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[3], factorChannel13_128_u_8x8);
6366 uint8x8x2_t results_u_8x8x2;
6370 results_u_8x8x2.val[0] = vqrshrn_n_u16(intermediateResultsChannel0_16x8, 7);
6371 results_u_8x8x2.val[1] = vqrshrn_n_u16(intermediateResultsChannel1_16x8, 7);
6374 vst2_u8(target, results_u_8x8x2);
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition FrameChannels.h:51
static bool premultipliedAlphaToStraightAlpha(const Frame &source, Frame &target, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
static bool zipChannels(const Frames &sourceFrames, Frame &targetFrame, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Zips/interleaves 1-channel images into one image with n-channels.
static bool separateTo1Channel(const Frame &sourceFrame, Frames &targetFrames, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static bool premultipliedAlphaToStraightAlpha(Frame &frame, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
static bool separateTo1Channel(const Frame &sourceFrame, const std::initializer_list< Frame * > &targetFrames, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static bool zipChannels(const std::initializer_list< Frame > &sourceFrames, Frame &targetFrame, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Zips/interleaves 1-channel images into one image with n-channels.
static bool straightAlphaToPremultipliedAlpha(Frame &frame, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
static bool straightAlphaToPremultipliedAlpha(const Frame &source, Frame &target, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
This class implements frame channel conversion, transformation and extraction functions.
Definition FrameChannels.h:31
static void reverseChannelOrder(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Reverses the order of the channels of a frame with zipped pixel format.
Definition FrameChannels.h:2840
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_1024_s_16x8, const __m128i &factorChannel10_1024_s_16x8, const __m128i &factorChannel20_1024_s_16x8, const __m128i &factorChannel01_1024_s_16x8, const __m128i &factorChannel11_1024_s_16x8, const __m128i &factorChannel21_1024_s_16x8, const __m128i &factorChannel02_1024_s_16x8, const __m128i &factorChannel12_1024_s_16x8, const __m128i &factorChannel22_1024_s_16x8, const __m128i &biasChannel0_1024_s_32x4, const __m128i &biasChannel1_1024_s_32x4, const __m128i &biasChannel2_1024_s_32x4)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5346
static void addChannelValueRow(const T *source, T *target, const size_t size, const void *channelValueParameter)
Adds a channel to a given row with generic (zipped) pixel format and sets all values to a specified v...
Definition FrameChannels.h:4292
static void shuffleRowChannelsAndSetLastChannelValue(const T *source, T *target, const size_t size, const void *options=nullptr)
Shuffles the channels of row pixels by application of a specified shuffle pattern and sets the last c...
Definition FrameChannels.h:3747
static void separateTo1Channel(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition FrameChannels.h:1847
static OCEAN_FORCE_INLINE void convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8, const uint8x16_t &channelValue3_u_8x16)
Converts 16 pixels with 3 channels per pixel to 16 pixels with 4 channels per pixel by a linear combi...
Definition FrameChannels.h:6194
static void addChannelRow(const void **sources, void **targets, const unsigned int multipleRowIndex, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const void *options)
Adds a channel to a given row with generic (zipped) pixel format and copies the information of the ne...
Definition FrameChannels.h:4192
static void shuffleChannelsAndSetLastChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Shuffles the channels of source frame and sets the last channel with constant value in the target fra...
Definition FrameChannels.h:3910
static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactors0_128_u_16x8, const __m128i &multiplicationFactors1_128_u_16x8, const __m128i &multiplicationFactors2_128_u_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with one channel per pixel by a linear comb...
Definition FrameChannels.h:5192
static void shuffleChannels(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Shuffles the channels of a frame by an arbitrary pattern.
Definition FrameChannels.h:3882
static void convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *channelMultiplicationFactors_128)
Converts a row of pixels with 3 channels to pixels with one channel by a linear combination of the fo...
Definition FrameChannels.h:4882
static void convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *channelMultiplicationFactors_128)
Converts a row of pixels with 4 channels to pixels with one channel by a linear combination of the fo...
Definition FrameChannels.h:4955
static void copyChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Copies one channel from a given frame with zipped pixel format to another frame with zipped pixel for...
Definition FrameChannels.h:2799
static void zipChannels(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition FrameChannels.h:2598
static void straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t *const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
Definition FrameChannels.h:4095
static void applyRowOperator(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const RowOperatorFunction< TSource, TTarget, tSourceChannels, tTargetChannels > &rowOperatorFunction, Worker *worker=nullptr)
Applies a row operator to all rows of a source image.
Definition FrameChannels.h:4010
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5773
static void convertRow4ChannelsTo2Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *multiplicationFactors_128)
Converts a row of pixels with 4 channels to pixels with two channel by a linear combination of the fo...
static void setChannelSubset(T *frame, const unsigned int width, const T value, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Sets one channel of a frame with one unique value.
Definition FrameChannels.h:4491
static void applyBivariateOperatorSubset(const TSource0 *source0, const TSource1 *source1, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Generic bivariate pixel operations.
Definition FrameChannels.h:4726
static void applyAdvancedPixelModifier(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:3972
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition FrameChannels.h:5719
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x4_t &factorChannel00_1024_s_16x4, const int16x4_t &factorChannel10_1024_s_16x4, const int16x4_t &factorChannel20_1024_s_16x4, const int16x4_t &factorChannel01_1024_s_16x4, const int16x4_t &factorChannel11_1024_s_16x4, const int16x4_t &factorChannel21_1024_s_16x4, const int16x4_t &factorChannel02_1024_s_16x4, const int16x4_t &factorChannel12_1024_s_16x4, const int16x4_t &factorChannel22_1024_s_16x4, const int32x4_t &biasChannel0_1024_s_32x4, const int32x4_t &biasChannel1_1024_s_32x4, const int32x4_t &biasChannel2_1024_s_32x4)
Converts 8 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear com...
Definition FrameChannels.h:5986
static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel0_128_u_8x8, const uint8x8_t &factorChannel1_128_u_8x8, const uint8x8_t &factorChannel2_128_u_8x8, const uint8x8_t &factorChannel3_128_u_8x8)
Converts 8 pixels with 4 channels per pixel to 8 pixels with one channel per pixel by a linear combin...
static void addFirstChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be th...
Definition FrameChannels.h:2711
static void addLastChannel(const T *source, const T *sourceNewChannel, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the ba...
Definition FrameChannels.h:2731
static void removeFirstChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Removes the first channel from a given frame with zipped (generic) pixel format.
Definition FrameChannels.h:2767
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x4_t &factorChannel00_1024_s_16x4, const int16x4_t &factorChannel10_1024_s_16x4, const int16x4_t &factorChannel20_1024_s_16x4, const int16x4_t &factorChannel01_1024_s_16x4, const int16x4_t &factorChannel11_1024_s_16x4, const int16x4_t &factorChannel21_1024_s_16x4, const int16x4_t &factorChannel02_1024_s_16x4, const int16x4_t &factorChannel12_1024_s_16x4, const int16x4_t &factorChannel22_1024_s_16x4, const int32x4_t &biasChannel0_1024_s_32x4, const int32x4_t &biasChannel1_1024_s_32x4, const int32x4_t &biasChannel2_1024_s_32x4)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition FrameChannels.h:5895
static void addLastChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be th...
Definition FrameChannels.h:2747
static void convertRow3ChannelsTo3Channels8BitPerChannel6BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void reverseRowPixelOrderInPlace(T *data, const size_t size)
Reverses/mirrors the order of pixels in a given row (or a memory block in general) in place.
Definition FrameChannels.h:3017
static void applyRowOperatorSubset(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const RowOperatorFunction< TSource, TTarget, tSourceChannels, tTargetChannels > rowOperatorFunction, const unsigned int firstRow, const unsigned int numberRows)
Applies a row operator to a subset of all rows of a source image.
Definition FrameChannels.h:4859
static void applyPixelModifier(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:3954
static void narrowRow16BitPerChannelTo8BitPerChannel(const uint16_t *source, uint8_t *target, const size_t size, const void *unusedParameters=nullptr)
Narrows a row of pixels with 16 bit channels to pixels with 8 bit channels.
Definition FrameChannels.h:4133
static void applyAdvancedPixelModifierSubset(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:4619
static void convertRow4ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void shuffleRowChannels(const T *source, T *target, const size_t size, const void *unusedOptions=nullptr)
Shuffles the channels of row pixels by application of a specified shuffle pattern.
Definition FrameChannels.h:3387
static void premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t *const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
Definition FrameChannels.h:4057
static void convertRow3ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_128_s_16x8, const int16x8_t &factorChannel10_128_s_16x8, const int16x8_t &factorChannel20_128_s_16x8, const int16x8_t &factorChannel01_128_s_16x8, const int16x8_t &factorChannel11_128_s_16x8, const int16x8_t &factorChannel21_128_s_16x8, const int16x8_t &factorChannel02_128_s_16x8, const int16x8_t &factorChannel12_128_s_16x8, const int16x8_t &factorChannel22_128_s_16x8, const int16x8_t &biasChannel0_128_s_16x8, const int16x8_t &biasChannel1_128_s_16x8, const int16x8_t &biasChannel2_128_s_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with 3 channels per pixel by a linear combi...
Definition FrameChannels.h:6112
static void convertRow3ChannelsTo4Channels8BitPerChannel6BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 4 channels by a linear combination of the thr...
static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME
Definition of a constant to specify that the number of channels are not known at compile time but at ...
Definition FrameChannels.h:37
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_128_s_16x8, const int16x8_t &factorChannel10_128_s_16x8, const int16x8_t &factorChannel20_128_s_16x8, const int16x8_t &factorChannel01_128_s_16x8, const int16x8_t &factorChannel11_128_s_16x8, const int16x8_t &factorChannel21_128_s_16x8, const int16x8_t &factorChannel02_128_s_16x8, const int16x8_t &factorChannel12_128_s_16x8, const int16x8_t &factorChannel22_128_s_16x8, const int16x8_t &biasChannel0_128_s_16x8, const int16x8_t &biasChannel1_128_s_16x8, const int16x8_t &biasChannel2_128_s_16x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition FrameChannels.h:5838
static void copyChannelRow(const T *source, T *target, const size_t size, const void *unusedParameters=nullptr)
Copies one channel from a source row to a target row with generic (zipped) pixel format.
Definition FrameChannels.h:4331
static void reverseRowPixelOrder(const T *source, T *target, const size_t size)
Reverses/mirrors the order of pixels in a given row (or a memory block in general).
Definition FrameChannels.h:2856
static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel0_128_u_8x8, const uint8x8_t &factorChannel1_128_u_8x8, const uint8x8_t &factorChannel2_128_u_8x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with one channel per pixel by a linear combin...
static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactors0123_128_s_32x)
Converts 16 pixels with 4 channels per pixel to 16 pixels with one channel per pixel by a linear comb...
Definition FrameChannels.h:5483
static void removeLastChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Removes the last channel from a given frame with zipped (generic) pixel format.
Definition FrameChannels.h:2783
static void transformGeneric(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker)
Transforms a frame with generic pixel format (with zipped pixel information) like RGB24 or YUV24,...
Definition FrameChannels.h:4032
static void setChannel(T *frame, const unsigned int width, const unsigned int height, const T value, const unsigned int framePaddingElements, Worker *worker=nullptr)
Sets one channel of a frame with a specific unique value.
Definition FrameChannels.h:2821
static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t *const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
Definition FrameChannels.h:5119
static void narrow16BitPerChannelTo8BitPerChannel(const uint16_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Narrows 16 bit channels of a frame to 8 bit channels.
Definition FrameChannels.h:3938
static void transformGenericSubset(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const RowReversePixelOrderFunction< void > rowReversePixelOrderFunction, const unsigned int bytesPerRow, const unsigned int sourceStrideBytes, const unsigned int targetStrideBytes, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a frame with generic pixel format (with zipped pixel information) like RGB24 o...
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_128_s_16x8, const __m128i &factorChannel10_128_s_16x8, const __m128i &factorChannel20_128_s_16x8, const __m128i &factorChannel01_128_s_16x8, const __m128i &factorChannel11_128_s_16x8, const __m128i &factorChannel21_128_s_16x8, const __m128i &factorChannel02_128_s_16x8, const __m128i &factorChannel12_128_s_16x8, const __m128i &factorChannel22_128_s_16x8, const __m128i &biasChannel0_s_16x8, const __m128i &biasChannel1_s_16x8, const __m128i &biasChannel2_s_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5258
static void reverseRowChannelOrder(const T *source, T *target, const size_t size, const void *unusedOptions=nullptr)
Reverses/mirrors the order of channels in a given row (or a memory block in general).
Definition FrameChannels.h:3195
static void convertRow3ChannelsTo3Channels8BitPerChannel10BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void applyBivariateOperator(const TSource0 *source0, const TSource1 *source1, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Generic bivariate pixel operations Applies bivariate per-pixel operators: C(y, x) = op(A(y,...
Definition FrameChannels.h:3991
static void addFirstChannel(const T *source, const T *sourceNewChannel, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the fr...
Definition FrameChannels.h:2695
static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel00_128_u_8x8, const uint8x8_t &factorChannel10_128_u_8x8, const uint8x8_t &factorChannel01_128_u_8x8, const uint8x8_t &factorChannel11_128_u_8x8, const uint8x8_t &factorChannel02_128_u_8x8, const uint8x8_t &factorChannel12_128_u_8x8, const uint8x8_t &factorChannel03_128_u_8x8, const uint8x8_t &factorChannel13_128_u_8x8)
Converts 8 pixels with 4 channels per pixel to 8 pixels with two channels per pixel by a linear combi...
Definition FrameChannels.h:6328
static void separateTo1ChannelRuntime(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition FrameChannels.h:4352
static void zipChannelsRuntime(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition FrameChannels.h:4425
void(*)(const TSource *sourceRow, TTarget *targetRow, const unsigned int width, const unsigned int height, unsigned int rowIndex, const unsigned int sourceStrideElements, const unsigned int targetStrideElements) RowOperatorFunction
Definition of a function pointer to a function able to operate on an entire image row.
Definition FrameChannels.h:43
static void applyPixelModifierSubset(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:4514
static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t *const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
Definition FrameChannels.h:5030
static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactorsChannel0_0123_128_s_16x8, const __m128i &multiplicationFactorsChannel1_0123_128_s_16x8)
Converts 16 pixels with 4 channels per pixel to 16 pixels with two channel per pixel by a linear comb...
Definition FrameChannels.h:5546
This is the base class for all frame converter classes.
Definition FrameConverter.h:32
ConversionFlag
Definition of individual conversion flags.
Definition FrameConverter.h:39
@ CONVERT_NORMAL
Normal conversion, neither flips nor mirrors the image.
Definition FrameConverter.h:49
@ CONVERT_FLIPPED_AND_MIRRORED
Rotated conversion, rotates the image by 180.0 degrees with anchor in the center of the image.
Definition FrameConverter.h:82
@ CONVERT_MIRRORED
Mirrored conversion, exchanges left and right of the image (like in a mirror, mirroring around the y-...
Definition FrameConverter.h:71
@ CONVERT_FLIPPED
Flipped conversion, exchanges top and bottom of the image (flipping around the x-axis).
Definition FrameConverter.h:60
static void convertGenericPixelFormat(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const ConversionFlag flag, const RowConversionFunction< TSource, TTarget > rowConversionFunction, const RowReversePixelOrderInPlaceFunction< TTarget > targetReversePixelOrderInPlaceFunction, const bool areContinuous, const void *options, Worker *worker)
Converts a frame with generic pixel format (e.g., RGBA32, BGR24, YUV24, ...) to a frame with generic ...
Definition FrameConverter.h:3483
void(*)(T *row, const size_t width) RowReversePixelOrderInPlaceFunction
Definition of a function pointer to a function able to reverse the order of pixels in an image row wi...
Definition FrameConverter.h:603
void(*)(const T *inputRow, T *targetRow, const size_t width) RowReversePixelOrderFunction
Definition of a function pointer to a function able to reverse the order of pixels in an image row wi...
Definition FrameConverter.h:594
static void convertArbitraryPixelFormat(const void **sources, void **targets, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int multipleRowsPerIteration, const MultipleRowsConversionFunction multipleRowsConversionFunction, const void *options, Worker *worker)
Converts a frame with arbitrary pixel format (e.g., Y_UV12, Y_VU12, YUYV16, ...) to a frame with arbi...
Definition FrameConverter.h:3506
static OCEAN_FORCE_INLINE uint8x16_t cast16ElementsNEON(const float32x4_t &sourceA_f_32x4, const float32x4_t &sourceB_f_32x4, const float32x4_t &sourceC_f_32x4, const float32x4_t &sourceD_f_32x4)
Casts 16 float elements to 16 uint8_t elements.
Definition NEON.h:1216
static __m128i divideByRightShiftSigned32Bit(const __m128i &value, const unsigned int rightShifts)
Divides eight signed 32 bit values by applying a right shift.
Definition SSE.h:3173
static __m128i load128i(const void *const buffer)
Loads a 128i value from the memory.
Definition SSE.h:3724
static void store128i(const __m128i &value, uint8_t *const buffer)
Stores a 128i value to the memory.
Definition SSE.h:3869
static __m128i divideByRightShiftSigned16Bit(const __m128i &value, const unsigned int rightShifts)
Divides eight int16_t values by applying a right shift.
Definition SSE.h:3104
static OCEAN_FORCE_INLINE void multiplyInt8x16ToInt32x8AndAccumulate(const __m128i &values0, const __m128i &values1, __m128i &results0, __m128i &results1)
Multiplies 8 int16_t values with 8 int16_t values and adds the products to 8 int32_t values.
Definition SSE.h:4014
static OCEAN_FORCE_INLINE void interleave3Channel8Bit48Elements(const __m128i &channel0, const __m128i &channel1, const __m128i &channel2, __m128i &interleavedA, __m128i &interleavedB, __m128i &interleavedC)
Interleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
Definition SSE.h:3410
static OCEAN_FORCE_INLINE void reverseChannelOrder3Channel8Bit48Elements(const __m128i &interleaved0, const __m128i &interleaved1, const __m128i &interleaved2, __m128i &reversedInterleaved0, __m128i &reversedInterleaved1, __m128i &reversedInterleaved2)
Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channe...
Definition SSE.h:3492
static __m128i removeHighBits16_8(const __m128i &value)
Removes the higher 8 bits of eight 16 bit elements.
Definition SSE.h:3904
static OCEAN_FORCE_INLINE void deInterleave3Channel8Bit48Elements(const __m128i &interleavedA, const __m128i &interleavedB, const __m128i &interleavedC, __m128i &channel0, __m128i &channel1, __m128i &channel2)
Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
Definition SSE.h:3369
static __m128i set128i(const unsigned long long high64, const unsigned long long low64)
Sets a 128i value by two 64 bit values.
Definition SSE.h:3875
static OCEAN_FORCE_INLINE void reverseChannelOrder4Channel8Bit64Elements(const uint8_t *interleaved, uint8_t *reversedInterleaved)
Reverses the order of the channels of 16 pixels (64 elements) of an image with 4 interleaved channels...
Definition SSE.h:3517
static OCEAN_FORCE_INLINE void multiplyInt8x16ToInt32x8(const __m128i &values0, const __m128i &values1, __m128i &products0, __m128i &products1)
Multiplies 8 int16_t values with 8 int16_t values and returns the products as 8 int32_t results.
Definition SSE.h:4005
static OCEAN_FORCE_INLINE void reverseChannelOrder2Channel8Bit32Elements(const uint8_t *interleaved, uint8_t *reversedInterleaved)
Reverses the order of the channels of 16 pixels (32 elements) of an image with 2 interleaved channels...
Definition SSE.h:3477
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition Caller.h:2877
This class implements Ocean's image class.
Definition Frame.h:1879
PixelFormat
Definition of all pixel formats available in the Ocean framework.
Definition Frame.h:183
typename TypeMapperBySize< sizeof(T)>::Type Type
Definition of an invalid mapped data type.
Definition DataType.h:508
This class implements a worker able to distribute function calls over different threads.
Definition Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
std::vector< Index32 > Indices32
Definition of a vector holding 32 bit index values.
Definition Base.h:96
std::vector< Frame > Frames
Definition of a vector holding padding frames.
Definition Frame.h:1842
The namespace covering the entire Ocean framework.
Definition Accessor.h:15
Default definition of a type with tBytes bytes.
Definition DataType.h:32