8#ifndef META_OCEAN_CV_FRAME_CHANNELS_H
9#define META_OCEAN_CV_FRAME_CHANNELS_H
37 static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME = 0u;
42 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
43 using RowOperatorFunction = void(*)(
const TSource* sourceRow, TTarget* targetRow,
const unsigned int width,
const unsigned int height,
unsigned int rowIndex,
const unsigned int sourceStrideElements,
const unsigned int targetStrideElements);
207 template <
typename TSource,
typename TTarget,
unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
208 static void separateTo1Channel(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements);
236 template <
typename TSource,
typename TTarget>
237 static void separateTo1Channel(
const TSource*
const sourceFrame,
const std::initializer_list<TTarget*>& targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int sourceFramePaddingElements,
const std::initializer_list<const unsigned int>& targetFramesPaddingElements);
265 template <
typename TSource,
typename TTarget,
unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
266 static void zipChannels(
const TSource*
const*
const sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements);
294 template <
typename TSource,
typename TTarget>
295 static void zipChannels(
const std::initializer_list<const TSource*>& sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const std::initializer_list<unsigned int>& sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements);
312 template <
typename T,
unsigned int tSourceChannels>
313 static inline void addFirstChannel(
const T* source,
const T* sourceNewChannel, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int sourceNewChannelPaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
329 template <
typename T,
unsigned int tSourceChannels>
330 static inline void addFirstChannelValue(
const T* source,
const T newChannelValue, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
347 template <
typename T,
unsigned int tSourceChannels>
348 static inline void addLastChannel(
const T* source,
const T* sourceNewChannel, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int sourceNewChannelPaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
364 template <
typename T,
unsigned int tSourceChannels>
365 static inline void addLastChannelValue(
const T* source,
const T newChannelValue, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
382 template <
typename T,
unsigned int tSourceChannels>
383 static inline void removeFirstChannel(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
400 template <
typename T,
unsigned int tSourceChannels>
401 static inline void removeLastChannel(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
418 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tSourceChannelIndex,
unsigned int tTargetChannelIndex>
419 static inline void copyChannel(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
433 template <
typename T,
unsigned int tChannel,
unsigned int tChannels>
434 static inline void setChannel(T* frame,
const unsigned int width,
const unsigned int height,
const T value,
const unsigned int framePaddingElements,
Worker* worker =
nullptr);
450 template <
typename T,
unsigned int tChannels>
451 static inline void reverseChannelOrder(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
477 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
478 static inline void shuffleChannels(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
505 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
506 static inline void shuffleChannelsAndSetLastChannelValue(
const T* source,
const T newChannelValue, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
520 template <
unsigned int tChannels>
521 static inline void narrow16BitPerChannelTo8BitPerChannel(
const uint16_t* source, uint8_t* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
535 template <
typename T,
unsigned int tChannels,
void (*tPixelFunction)(const T*, T*)>
536 static void applyPixelModifier(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
Worker* worker =
nullptr);
554 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tPixelFunction)(const TSource*, TTarget*)>
555 static void applyAdvancedPixelModifier(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
Worker* worker =
nullptr);
578 template <
typename TSource0,
typename TSource1,
typename TTarget,
typename TIntermediate,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
579 static void applyBivariateOperator(
const TSource0* source0,
const TSource1* source1, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int source0PaddingElements,
const unsigned int source1PaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
Worker* worker =
nullptr);
598 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
599 static void applyRowOperator(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels>& rowOperatorFunction,
Worker* worker =
nullptr);
615 template <
typename T,
unsigned int tChannels>
616 static inline void transformGeneric(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker);
629 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
630 static inline void premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t*
const frame,
const unsigned int width,
const unsigned int height,
const unsigned int framePaddingElements,
Worker* worker =
nullptr);
645 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
646 static inline void premultipliedAlphaToStraightAlpha8BitPerChannel(
const uint8_t*
const source, uint8_t*
const target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
659 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
660 static inline void straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t*
const frame,
const unsigned int width,
const unsigned int height,
const unsigned int framePaddingElements,
Worker* worker =
nullptr);
675 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
676 static inline void straightAlphaToPremultipliedAlpha8BitPerChannel(
const uint8_t*
const source, uint8_t*
const target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
686 template <
typename T,
unsigned int tChannels>
687 static void reverseRowPixelOrder(
const T* source, T* target,
const size_t size);
696 template <
typename T,
unsigned int tChannels>
697 static void reverseRowPixelOrderInPlace(T* data,
const size_t size);
708 template <
typename T,
unsigned int tChannels>
709 static void reverseRowChannelOrder(
const T* source, T* target,
const size_t size,
const void* unusedOptions =
nullptr);
731 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
732 static inline void shuffleRowChannels(
const T* source, T* target,
const size_t size,
const void* unusedOptions =
nullptr);
754 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
755 static inline void shuffleRowChannelsAndSetLastChannelValue(
const T* source, T* target,
const size_t size,
const void* options =
nullptr);
770 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2>
771 static void convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(
const uint8_t* source, uint8_t* target,
const size_t size,
const void* channelMultiplicationFactors_128);
872 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2,
bool tUseFactorChannel3>
873 static void convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(
const uint8_t* source, uint8_t* target,
const size_t size,
const void* channelMultiplicationFactors_128);
920 template <
unsigned int tChannels>
921 static void narrowRow16BitPerChannelTo8BitPerChannel(
const uint16_t* source, uint8_t* target,
const size_t size,
const void* unusedParameters =
nullptr);
937 template <
typename T,
unsigned int tSourceChannels,
bool tAddToFront>
938 static void addChannelRow(
const void** sources,
void** targets,
const unsigned int multipleRowIndex,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const void* options);
951 template <
typename T,
unsigned int tSourceChannels,
bool tAddToFront>
952 static void addChannelValueRow(
const T* source, T* target,
const size_t size,
const void* channelValueParameter);
966 template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tSourceChannelIndex,
unsigned int tTargetChannelIndex>
967 static void copyChannelRow(
const T* source, T* target,
const size_t size,
const void* unusedParameters =
nullptr);
983 template <
typename TSource,
typename TTarget>
984 static void separateTo1ChannelRuntime(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements);
998 template <
typename TSource,
typename TTarget>
999 static void zipChannelsRuntime(
const TSource*
const*
const sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements);
1013 template <
typename T,
unsigned int tChannel,
unsigned int tChannels>
1014 static void setChannelSubset(T* frame,
const unsigned int width,
const T value,
const unsigned int framePaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1029 template <
typename T,
unsigned int tChannels,
void (*tPixelFunction)(const T*, T*)>
1030 static void applyPixelModifierSubset(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows);
1049 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tPixelFunction)(const TSource*, TTarget*)>
1050 static void applyAdvancedPixelModifierSubset(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows);
1073 template <
typename TSource0,
typename TSource1,
typename TTarget,
typename TIntermediate,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
1074 static void applyBivariateOperatorSubset(
const TSource0* source0,
const TSource1* source1, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int source0PaddingElements,
const unsigned int source1PaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows);
1094 template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
1095 static void applyRowOperatorSubset(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourceStrideElements,
const unsigned int targetStrideElements,
const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels> rowOperatorFunction,
const unsigned int firstRow,
const unsigned int numberRows);
1111 static void transformGenericSubset(
const uint8_t* source, uint8_t* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const RowReversePixelOrderFunction<void> rowReversePixelOrderFunction,
const unsigned int bytesPerRow,
const unsigned int sourceStrideBytes,
const unsigned int targetStrideBytes,
const unsigned int firstRow,
const unsigned int numberRows);
1123 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
1124 static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t*
const frame,
const unsigned int width,
const unsigned int framePaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1138 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
1139 static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(
const uint8_t*
const source, uint8_t*
const target,
const unsigned int width,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1151 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
1152 static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t*
const frame,
const unsigned int width,
const unsigned int framePaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1166 template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
1167 static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(
const uint8_t*
const source, uint8_t*
const target,
const unsigned int width,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1169#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1182 static OCEAN_FORCE_INLINE
void convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& multiplicationFactors0_128_u_16x8,
const __m128i& multiplicationFactors1_128_u_16x8,
const __m128i& multiplicationFactors2_128_u_16x8);
1211 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_128_s_16x8,
const __m128i& factorChannel10_128_s_16x8,
const __m128i& factorChannel20_128_s_16x8,
const __m128i& factorChannel01_128_s_16x8,
const __m128i& factorChannel11_128_s_16x8,
const __m128i& factorChannel21_128_s_16x8,
const __m128i& factorChannel02_128_s_16x8,
const __m128i& factorChannel12_128_s_16x8,
const __m128i& factorChannel22_128_s_16x8,
const __m128i& biasChannel0_s_16x8,
const __m128i& biasChannel1_s_16x8,
const __m128i& biasChannel2_s_16x8);
1240 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_1024_s_16x8,
const __m128i& factorChannel10_1024_s_16x8,
const __m128i& factorChannel20_1024_s_16x8,
const __m128i& factorChannel01_1024_s_16x8,
const __m128i& factorChannel11_1024_s_16x8,
const __m128i& factorChannel21_1024_s_16x8,
const __m128i& factorChannel02_1024_s_16x8,
const __m128i& factorChannel12_1024_s_16x8,
const __m128i& factorChannel22_1024_s_16x8,
const __m128i& biasChannel0_1024_s_32x4,
const __m128i& biasChannel1_1024_s_32x4,
const __m128i& biasChannel2_1024_s_32x4);
1251 static OCEAN_FORCE_INLINE
void convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& multiplicationFactors0123_128_s_32x);
1263 static OCEAN_FORCE_INLINE
void convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& multiplicationFactorsChannel0_0123_128_s_16x8,
const __m128i& multiplicationFactorsChannel1_0123_128_s_16x8);
1267#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1283 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2>
1313 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8);
1342 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8);
1371 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_128_s_16x8,
const int16x8_t& factorChannel10_128_s_16x8,
const int16x8_t& factorChannel20_128_s_16x8,
const int16x8_t& factorChannel01_128_s_16x8,
const int16x8_t& factorChannel11_128_s_16x8,
const int16x8_t& factorChannel21_128_s_16x8,
const int16x8_t& factorChannel02_128_s_16x8,
const int16x8_t& factorChannel12_128_s_16x8,
const int16x8_t& factorChannel22_128_s_16x8,
const int16x8_t& biasChannel0_128_s_16x8,
const int16x8_t& biasChannel1_128_s_16x8,
const int16x8_t& biasChannel2_128_s_16x8);
1400 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x4_t& factorChannel00_1024_s_16x4,
const int16x4_t& factorChannel10_1024_s_16x4,
const int16x4_t& factorChannel20_1024_s_16x4,
const int16x4_t& factorChannel01_1024_s_16x4,
const int16x4_t& factorChannel11_1024_s_16x4,
const int16x4_t& factorChannel21_1024_s_16x4,
const int16x4_t& factorChannel02_1024_s_16x4,
const int16x4_t& factorChannel12_1024_s_16x4,
const int16x4_t& factorChannel22_1024_s_16x4,
const int32x4_t& biasChannel0_1024_s_32x4,
const int32x4_t& biasChannel1_1024_s_32x4,
const int32x4_t& biasChannel2_1024_s_32x4);
1429 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x4_t& factorChannel00_1024_s_16x4,
const int16x4_t& factorChannel10_1024_s_16x4,
const int16x4_t& factorChannel20_1024_s_16x4,
const int16x4_t& factorChannel01_1024_s_16x4,
const int16x4_t& factorChannel11_1024_s_16x4,
const int16x4_t& factorChannel21_1024_s_16x4,
const int16x4_t& factorChannel02_1024_s_16x4,
const int16x4_t& factorChannel12_1024_s_16x4,
const int16x4_t& factorChannel22_1024_s_16x4,
const int32x4_t& biasChannel0_1024_s_32x4,
const int32x4_t& biasChannel1_1024_s_32x4,
const int32x4_t& biasChannel2_1024_s_32x4);
1458 static OCEAN_FORCE_INLINE
void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_128_s_16x8,
const int16x8_t& factorChannel10_128_s_16x8,
const int16x8_t& factorChannel20_128_s_16x8,
const int16x8_t& factorChannel01_128_s_16x8,
const int16x8_t& factorChannel11_128_s_16x8,
const int16x8_t& factorChannel21_128_s_16x8,
const int16x8_t& factorChannel02_128_s_16x8,
const int16x8_t& factorChannel12_128_s_16x8,
const int16x8_t& factorChannel22_128_s_16x8,
const int16x8_t& biasChannel0_128_s_16x8,
const int16x8_t& biasChannel1_128_s_16x8,
const int16x8_t& biasChannel2_128_s_16x8);
1490 static OCEAN_FORCE_INLINE
void convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8,
const uint8x16_t& channelValue3_u_8x16);
1508 template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2,
bool tUseFactorChannel3>
1527 static OCEAN_FORCE_INLINE
void convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const uint8x8_t& factorChannel00_128_u_8x8,
const uint8x8_t& factorChannel10_128_u_8x8,
const uint8x8_t& factorChannel01_128_u_8x8,
const uint8x8_t& factorChannel11_128_u_8x8,
const uint8x8_t& factorChannel02_128_u_8x8,
const uint8x8_t& factorChannel12_128_u_8x8,
const uint8x8_t& factorChannel03_128_u_8x8,
const uint8x8_t& factorChannel13_128_u_8x8);
1533#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1536inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 2u>(
const uint8_t*
const sourceFrame, uint8_t*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
1538 ocean_assert(sourceFrame !=
nullptr);
1539 ocean_assert(targetFrames !=
nullptr);
1541 ocean_assert(width != 0u && height != 0u);
1542 ocean_assert(channels == 2u);
1544 constexpr unsigned int tChannels = 2u;
1546 bool allTargetFramesContinuous =
true;
1548 if (targetFramesPaddingElements !=
nullptr)
1550 for (
unsigned int n = 0u; n < tChannels; ++n)
1552 if (targetFramesPaddingElements[n] != 0u)
1554 allTargetFramesContinuous =
false;
1560 const uint8_t* source = sourceFrame;
1561 uint8_t* target0 = targetFrames[0];
1562 uint8_t* target1 = targetFrames[1];
1564 constexpr unsigned int tBlockSize = 16u;
1566 uint8x16x2_t source_8x16x2;
1568 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1570 const unsigned int pixels = width * height;
1571 const unsigned int blocks = pixels / tBlockSize;
1572 const unsigned int remaining = pixels % tBlockSize;
1574 for (
unsigned int n = 0u; n < blocks; ++n)
1576 source_8x16x2 = vld2q_u8(source);
1578 vst1q_u8(target0, source_8x16x2.val[0]);
1579 vst1q_u8(target1, source_8x16x2.val[1]);
1581 source += tBlockSize * tChannels;
1583 target0 += tBlockSize;
1584 target1 += tBlockSize;
1587 for (
unsigned int n = 0u; n < remaining; ++n)
1589 target0[n] = source[n * tChannels + 0u];
1590 target1[n] = source[n * tChannels + 1u];
1595 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[0];
1596 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[1];
1598 const unsigned int blocks = width / tBlockSize;
1599 const unsigned int remaining = width % tBlockSize;
1601 for (
unsigned int y = 0u; y < height; ++y)
1603 for (
unsigned int n = 0u; n < blocks; ++n)
1605 source_8x16x2 = vld2q_u8(source);
1607 vst1q_u8(target0, source_8x16x2.val[0]);
1608 vst1q_u8(target1, source_8x16x2.val[1]);
1610 source += tBlockSize * tChannels;
1612 target0 += tBlockSize;
1613 target1 += tBlockSize;
1616 for (
unsigned int n = 0u; n < remaining; ++n)
1618 target0[n] = source[n * tChannels + 0u];
1619 target1[n] = source[n * tChannels + 1u];
1622 source += remaining * tChannels + sourceFramePaddingElements;
1623 target0 += remaining + targetFrame0PaddingElements;
1624 target1 += remaining + targetFrame1PaddingElements;
1630inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 3u>(
const uint8_t*
const sourceFrame, uint8_t*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
1632 ocean_assert(sourceFrame !=
nullptr);
1633 ocean_assert(targetFrames !=
nullptr);
1635 ocean_assert(width != 0u && height != 0u);
1636 ocean_assert(channels == 3u);
1638 constexpr unsigned int tChannels = 3u;
1640 bool allTargetFramesContinuous =
true;
1642 if (targetFramesPaddingElements !=
nullptr)
1644 for (
unsigned int n = 0u; n < tChannels; ++n)
1646 if (targetFramesPaddingElements[n] != 0u)
1648 allTargetFramesContinuous =
false;
1654 const uint8_t* source = sourceFrame;
1655 uint8_t* target0 = targetFrames[0];
1656 uint8_t* target1 = targetFrames[1];
1657 uint8_t* target2 = targetFrames[2];
1659 constexpr unsigned int tBlockSize = 16u;
1661 uint8x16x3_t source_8x16x3;
1663 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1665 const unsigned int pixels = width * height;
1666 const unsigned int blocks = pixels / tBlockSize;
1667 const unsigned int remaining = pixels % tBlockSize;
1669 for (
unsigned int n = 0u; n < blocks; ++n)
1671 source_8x16x3 = vld3q_u8(source);
1673 vst1q_u8(target0, source_8x16x3.val[0]);
1674 vst1q_u8(target1, source_8x16x3.val[1]);
1675 vst1q_u8(target2, source_8x16x3.val[2]);
1677 source += tBlockSize * tChannels;
1679 target0 += tBlockSize;
1680 target1 += tBlockSize;
1681 target2 += tBlockSize;
1684 for (
unsigned int n = 0u; n < remaining; ++n)
1686 target0[n] = source[n * tChannels + 0u];
1687 target1[n] = source[n * tChannels + 1u];
1688 target2[n] = source[n * tChannels + 2u];
1693 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[0];
1694 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[1];
1695 const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[2];
1697 const unsigned int blocks = width / tBlockSize;
1698 const unsigned int remaining = width % tBlockSize;
1700 for (
unsigned int y = 0u; y < height; ++y)
1702 for (
unsigned int n = 0u; n < blocks; ++n)
1704 source_8x16x3 = vld3q_u8(source);
1706 vst1q_u8(target0, source_8x16x3.val[0]);
1707 vst1q_u8(target1, source_8x16x3.val[1]);
1708 vst1q_u8(target2, source_8x16x3.val[2]);
1710 source += tBlockSize * tChannels;
1712 target0 += tBlockSize;
1713 target1 += tBlockSize;
1714 target2 += tBlockSize;
1717 for (
unsigned int n = 0u; n < remaining; ++n)
1719 target0[n] = source[n * tChannels + 0u];
1720 target1[n] = source[n * tChannels + 1u];
1721 target2[n] = source[n * tChannels + 2u];
1724 source += remaining * tChannels + sourceFramePaddingElements;
1725 target0 += remaining + targetFrame0PaddingElements;
1726 target1 += remaining + targetFrame1PaddingElements;
1727 target2 += remaining + targetFrame2PaddingElements;
1733inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 4u>(
const uint8_t*
const sourceFrame, uint8_t*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
1735 ocean_assert(sourceFrame !=
nullptr);
1736 ocean_assert(targetFrames !=
nullptr);
1738 ocean_assert(width != 0u && height != 0u);
1739 ocean_assert(channels == 4u);
1741 constexpr unsigned int tChannels = 4u;
1743 bool allTargetFramesContinuous =
true;
1745 if (targetFramesPaddingElements !=
nullptr)
1747 for (
unsigned int n = 0u; n < tChannels; ++n)
1749 if (targetFramesPaddingElements[n] != 0u)
1751 allTargetFramesContinuous =
false;
1757 const uint8_t* source = sourceFrame;
1758 uint8_t* target0 = targetFrames[0];
1759 uint8_t* target1 = targetFrames[1];
1760 uint8_t* target2 = targetFrames[2];
1761 uint8_t* target3 = targetFrames[3];
1763 constexpr unsigned int tBlockSize = 16u;
1765 uint8x16x4_t source_8x16x4;
1767 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1769 const unsigned int pixels = width * height;
1770 const unsigned int blocks = pixels / tBlockSize;
1771 const unsigned int remaining = pixels % tBlockSize;
1773 for (
unsigned int n = 0u; n < blocks; ++n)
1775 source_8x16x4 = vld4q_u8(source);
1777 vst1q_u8(target0, source_8x16x4.val[0]);
1778 vst1q_u8(target1, source_8x16x4.val[1]);
1779 vst1q_u8(target2, source_8x16x4.val[2]);
1780 vst1q_u8(target3, source_8x16x4.val[3]);
1782 source += tBlockSize * tChannels;
1784 target0 += tBlockSize;
1785 target1 += tBlockSize;
1786 target2 += tBlockSize;
1787 target3 += tBlockSize;
1790 for (
unsigned int n = 0u; n < remaining; ++n)
1792 target0[n] = source[n * tChannels + 0u];
1793 target1[n] = source[n * tChannels + 1u];
1794 target2[n] = source[n * tChannels + 2u];
1795 target3[n] = source[n * tChannels + 3u];
1800 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[0];
1801 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[1];
1802 const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[2];
1803 const unsigned int targetFrame3PaddingElements = targetFramesPaddingElements ==
nullptr ? 0u : targetFramesPaddingElements[3];
1805 const unsigned int blocks = width / tBlockSize;
1806 const unsigned int remaining = width % tBlockSize;
1808 for (
unsigned int y = 0u; y < height; ++y)
1810 for (
unsigned int n = 0u; n < blocks; ++n)
1812 source_8x16x4 = vld4q_u8(source);
1814 vst1q_u8(target0, source_8x16x4.val[0]);
1815 vst1q_u8(target1, source_8x16x4.val[1]);
1816 vst1q_u8(target2, source_8x16x4.val[2]);
1817 vst1q_u8(target3, source_8x16x4.val[3]);
1819 source += tBlockSize * tChannels;
1821 target0 += tBlockSize;
1822 target1 += tBlockSize;
1823 target2 += tBlockSize;
1824 target3 += tBlockSize;
1827 for (
unsigned int n = 0u; n < remaining; ++n)
1829 target0[n] = source[n * tChannels + 0u];
1830 target1[n] = source[n * tChannels + 1u];
1831 target2[n] = source[n * tChannels + 2u];
1832 target3[n] = source[n * tChannels + 3u];
1835 source += remaining * tChannels + sourceFramePaddingElements;
1836 target0 += remaining + targetFrame0PaddingElements;
1837 target1 += remaining + targetFrame1PaddingElements;
1838 target2 += remaining + targetFrame2PaddingElements;
1839 target3 += remaining + targetFrame3PaddingElements;
1846template <
typename TSource,
typename TTarget,
unsigned int tChannels>
1847void FrameChannels::separateTo1Channel(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
1849 ocean_assert(sourceFrame !=
nullptr);
1850 ocean_assert(targetFrames !=
nullptr);
1852 ocean_assert(width != 0u && height != 0u);
1858 separateTo1ChannelRuntime<TSource, TTarget>(sourceFrame, targetFrames, width, height, channels, sourceFramePaddingElements, targetFramesPaddingElements);
1863 for (
unsigned int c = 0u; c < tChannels; ++c)
1865 ocean_assert(targetFrames[c] !=
nullptr);
1869 if (sourceFramePaddingElements == 0u && targetFramesPaddingElements ==
nullptr)
1871 for (
unsigned int n = 0u; n < width * height; ++n)
1873 for (
unsigned int c = 0u; c < tChannels; ++c)
1875 targetFrames[c][n] = TTarget(sourceFrame[n * tChannels + c]);
1879 else if (targetFramesPaddingElements ==
nullptr)
1881 ocean_assert(sourceFramePaddingElements != 0u);
1883 const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
1885 for (
unsigned int y = 0u; y < height; ++y)
1887 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
1889 const unsigned int targetRowOffset = y * width;
1891 for (
unsigned int x = 0u; x < width; ++x)
1893 for (
unsigned int c = 0u; c < tChannels; ++c)
1895 *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * tChannels + c));
1902 const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
1904 Indices32 targetFrameStrideElements(tChannels);
1906 for (
unsigned int c = 0u; c < tChannels; ++c)
1908 targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
1911 for (
unsigned int y = 0u; y < height; ++y)
1913 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
1915 for (
unsigned int x = 0u; x < width; ++x)
1917 for (
unsigned int c = 0u; c < tChannels; ++c)
1919 *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * tChannels + c));
1926template <
typename TSource,
typename TTarget>
1927void FrameChannels::separateTo1Channel(
const TSource*
const sourceFrame,
const std::initializer_list<TTarget*>& targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int sourceFramePaddingElements,
const std::initializer_list<const unsigned int>& targetFramesPaddingElements)
1929 ocean_assert(targetFrames.size() >= 1);
1930 ocean_assert(targetFramesPaddingElements.size() == 0 || targetFrames.size() == targetFramesPaddingElements.size());
1932 if (targetFrames.size() == 2)
1934 separateTo1Channel<TSource, TTarget, 2u>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
1936 else if (targetFrames.size() == 3)
1938 separateTo1Channel<TSource, TTarget, 3u>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
1940 else if (targetFrames.size() == 4)
1942 separateTo1Channel<TSource, TTarget, 4u>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
1946 separateTo1Channel<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrame, targetFrames.begin(), width, height, (
unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ?
nullptr : targetFramesPaddingElements.begin());
1950#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1953inline void FrameChannels::zipChannels<uint8_t, uint8_t, 2u>(
const uint8_t*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
1955 ocean_assert(sourceFrames !=
nullptr);
1956 ocean_assert(targetFrame !=
nullptr);
1958 ocean_assert(width != 0u && height != 0u);
1959 ocean_assert(channels == 2u);
1961 constexpr unsigned int tChannels = 2u;
1963 bool allSourceFramesContinuous =
true;
1965 if (sourceFramesPaddingElements !=
nullptr)
1967 for (
unsigned int n = 0u; n < tChannels; ++n)
1969 if (sourceFramesPaddingElements[n] != 0u)
1971 allSourceFramesContinuous =
false;
1977 const uint8_t* source0 = sourceFrames[0];
1978 const uint8_t* source1 = sourceFrames[1];
1979 uint8_t* target = targetFrame;
1981 constexpr unsigned int tBlockSize = 16u;
1983 uint8x16x2_t source_8x16x2;
1985 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
1987 const unsigned int pixels = width * height;
1988 const unsigned int blocks = pixels / tBlockSize;
1989 const unsigned int remaining = pixels % tBlockSize;
1991 for (
unsigned int n = 0u; n < blocks; ++n)
1993 source_8x16x2.val[0] = vld1q_u8(source0);
1994 source_8x16x2.val[1] = vld1q_u8(source1);
1996 vst2q_u8(target, source_8x16x2);
1998 source0 += tBlockSize;
1999 source1 += tBlockSize;
2001 target += tBlockSize * tChannels;
2004 for (
unsigned int n = 0u; n < remaining; ++n)
2006 target[n * tChannels + 0u] = source0[n];
2007 target[n * tChannels + 1u] = source1[n];
2012 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2013 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2015 const unsigned int blocks = width / tBlockSize;
2016 const unsigned int remaining = width % tBlockSize;
2018 for (
unsigned int y = 0u; y < height; ++y)
2020 for (
unsigned int n = 0u; n < blocks; ++n)
2022 source_8x16x2.val[0] = vld1q_u8(source0);
2023 source_8x16x2.val[1] = vld1q_u8(source1);
2025 vst2q_u8(target, source_8x16x2);
2027 source0 += tBlockSize;
2028 source1 += tBlockSize;
2030 target += tBlockSize * tChannels;
2033 for (
unsigned int n = 0u; n < remaining; ++n)
2035 target[n * tChannels + 0u] = source0[n];
2036 target[n * tChannels + 1u] = source1[n];
2039 source0 += remaining + sourceFrame0PaddingElements;
2040 source1 += remaining + sourceFrame1PaddingElements;
2041 target += remaining * tChannels + targetFramePaddingElements;
2047inline void FrameChannels::zipChannels<uint8_t, uint8_t, 3u>(
const uint8_t*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2049 ocean_assert(sourceFrames !=
nullptr);
2050 ocean_assert(targetFrame !=
nullptr);
2052 ocean_assert(width != 0u && height != 0u);
2053 ocean_assert(channels == 3u);
2055 constexpr unsigned int tChannels = 3u;
2057 bool allSourceFramesContinuous =
true;
2059 if (sourceFramesPaddingElements !=
nullptr)
2061 for (
unsigned int n = 0u; n < tChannels; ++n)
2063 if (sourceFramesPaddingElements[n] != 0u)
2065 allSourceFramesContinuous =
false;
2071 const uint8_t* source0 = sourceFrames[0];
2072 const uint8_t* source1 = sourceFrames[1];
2073 const uint8_t* source2 = sourceFrames[2];
2074 uint8_t* target = targetFrame;
2076 constexpr unsigned int tBlockSize = 16u;
2078 uint8x16x3_t source_8x16x3;
2080 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2082 const unsigned int pixels = width * height;
2083 const unsigned int blocks = pixels / tBlockSize;
2084 const unsigned int remaining = pixels % tBlockSize;
2086 for (
unsigned int n = 0u; n < blocks; ++n)
2088 source_8x16x3.val[0] = vld1q_u8(source0);
2089 source_8x16x3.val[1] = vld1q_u8(source1);
2090 source_8x16x3.val[2] = vld1q_u8(source2);
2092 vst3q_u8(target, source_8x16x3);
2094 source0 += tBlockSize;
2095 source1 += tBlockSize;
2096 source2 += tBlockSize;
2098 target += tBlockSize * tChannels;
2101 for (
unsigned int n = 0u; n < remaining; ++n)
2103 target[n * tChannels + 0u] = source0[n];
2104 target[n * tChannels + 1u] = source1[n];
2105 target[n * tChannels + 2u] = source2[n];
2110 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2111 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2112 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[2];
2114 const unsigned int blocks = width / tBlockSize;
2115 const unsigned int remaining = width % tBlockSize;
2117 for (
unsigned int y = 0u; y < height; ++y)
2119 for (
unsigned int n = 0u; n < blocks; ++n)
2121 source_8x16x3.val[0] = vld1q_u8(source0);
2122 source_8x16x3.val[1] = vld1q_u8(source1);
2123 source_8x16x3.val[2] = vld1q_u8(source2);
2125 vst3q_u8(target, source_8x16x3);
2127 source0 += tBlockSize;
2128 source1 += tBlockSize;
2129 source2 += tBlockSize;
2131 target += tBlockSize * tChannels;
2134 for (
unsigned int n = 0u; n < remaining; ++n)
2136 target[n * tChannels + 0u] = source0[n];
2137 target[n * tChannels + 1u] = source1[n];
2138 target[n * tChannels + 2u] = source2[n];
2141 source0 += remaining + sourceFrame0PaddingElements;
2142 source1 += remaining + sourceFrame1PaddingElements;
2143 source2 += remaining + sourceFrame2PaddingElements;
2144 target += remaining * tChannels + targetFramePaddingElements;
2150inline void FrameChannels::zipChannels<uint8_t, uint8_t, 4u>(
const uint8_t*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2152 ocean_assert(sourceFrames !=
nullptr);
2153 ocean_assert(targetFrame !=
nullptr);
2155 ocean_assert(width != 0u && height != 0u);
2156 ocean_assert(channels == 4u);
2158 constexpr unsigned int tChannels = 4u;
2160 bool allSourceFramesContinuous =
true;
2162 if (sourceFramesPaddingElements !=
nullptr)
2164 for (
unsigned int n = 0u; n < tChannels; ++n)
2166 if (sourceFramesPaddingElements[n] != 0u)
2168 allSourceFramesContinuous =
false;
2174 const uint8_t* source0 = sourceFrames[0];
2175 const uint8_t* source1 = sourceFrames[1];
2176 const uint8_t* source2 = sourceFrames[2];
2177 const uint8_t* source3 = sourceFrames[3];
2178 uint8_t* target = targetFrame;
2180 constexpr unsigned int tBlockSize = 16u;
2182 uint8x16x4_t source_8x16x4;
2184 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2186 const unsigned int pixels = width * height;
2187 const unsigned int blocks = pixels / tBlockSize;
2188 const unsigned int remaining = pixels % tBlockSize;
2190 for (
unsigned int n = 0u; n < blocks; ++n)
2192 source_8x16x4.val[0] = vld1q_u8(source0);
2193 source_8x16x4.val[1] = vld1q_u8(source1);
2194 source_8x16x4.val[2] = vld1q_u8(source2);
2195 source_8x16x4.val[3] = vld1q_u8(source3);
2197 vst4q_u8(target, source_8x16x4);
2199 source0 += tBlockSize;
2200 source1 += tBlockSize;
2201 source2 += tBlockSize;
2202 source3 += tBlockSize;
2204 target += tBlockSize * tChannels;
2207 for (
unsigned int n = 0u; n < remaining; ++n)
2209 target[n * tChannels + 0u] = source0[n];
2210 target[n * tChannels + 1u] = source1[n];
2211 target[n * tChannels + 2u] = source2[n];
2212 target[n * tChannels + 3u] = source3[n];
2217 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2218 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2219 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[2];
2220 const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[3];
2222 const unsigned int blocks = width / tBlockSize;
2223 const unsigned int remaining = width % tBlockSize;
2225 for (
unsigned int y = 0u; y < height; ++y)
2227 for (
unsigned int n = 0u; n < blocks; ++n)
2229 source_8x16x4.val[0] = vld1q_u8(source0);
2230 source_8x16x4.val[1] = vld1q_u8(source1);
2231 source_8x16x4.val[2] = vld1q_u8(source2);
2232 source_8x16x4.val[3] = vld1q_u8(source3);
2234 vst4q_u8(target, source_8x16x4);
2236 source0 += tBlockSize;
2237 source1 += tBlockSize;
2238 source2 += tBlockSize;
2239 source3 += tBlockSize;
2241 target += tBlockSize * tChannels;
2244 for (
unsigned int n = 0u; n < remaining; ++n)
2246 target[n * tChannels + 0u] = source0[n];
2247 target[n * tChannels + 1u] = source1[n];
2248 target[n * tChannels + 2u] = source2[n];
2249 target[n * tChannels + 3u] = source3[n];
2252 source0 += remaining + sourceFrame0PaddingElements;
2253 source1 += remaining + sourceFrame1PaddingElements;
2254 source2 += remaining + sourceFrame2PaddingElements;
2255 source3 += remaining + sourceFrame3PaddingElements;
2256 target += remaining * tChannels + targetFramePaddingElements;
2262inline void FrameChannels::zipChannels<float, uint8_t, 2u>(
const float*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2264 ocean_assert(sourceFrames !=
nullptr);
2265 ocean_assert(targetFrame !=
nullptr);
2267 ocean_assert(width != 0u && height != 0u);
2268 ocean_assert(channels == 2u);
2270 constexpr unsigned int tChannels = 2u;
2272 bool allSourceFramesContinuous =
true;
2274 if (sourceFramesPaddingElements !=
nullptr)
2276 for (
unsigned int n = 0u; n < tChannels; ++n)
2278 if (sourceFramesPaddingElements[n] != 0u)
2280 allSourceFramesContinuous =
false;
2286 const float* source0 = sourceFrames[0];
2287 const float* source1 = sourceFrames[1];
2288 uint8_t* target = targetFrame;
2290 constexpr unsigned int tBlockSize = 16u;
2292 uint8x16x2_t target_8x16x2;
2294 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2296 const unsigned int pixels = width * height;
2297 const unsigned int blocks = pixels / tBlockSize;
2298 const unsigned int remaining = pixels % tBlockSize;
2300 for (
unsigned int n = 0u; n < blocks; ++n)
2305 vst2q_u8(target, target_8x16x2);
2307 source0 += tBlockSize;
2308 source1 += tBlockSize;
2310 target += tBlockSize * tChannels;
2313 for (
unsigned int n = 0u; n < remaining; ++n)
2315 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2316 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2318 target[n * tChannels + 0u] = uint8_t(source0[n]);
2319 target[n * tChannels + 1u] = uint8_t(source1[n]);
2324 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2325 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2327 const unsigned int blocks = width / tBlockSize;
2328 const unsigned int remaining = width % tBlockSize;
2330 for (
unsigned int y = 0u; y < height; ++y)
2332 for (
unsigned int n = 0u; n < blocks; ++n)
2337 vst2q_u8(target, target_8x16x2);
2339 source0 += tBlockSize;
2340 source1 += tBlockSize;
2342 target += tBlockSize * tChannels;
2345 for (
unsigned int n = 0u; n < remaining; ++n)
2347 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2348 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2350 target[n * tChannels + 0u] = uint8_t(source0[n]);
2351 target[n * tChannels + 1u] = uint8_t(source1[n]);
2354 source0 += remaining + sourceFrame0PaddingElements;
2355 source1 += remaining + sourceFrame1PaddingElements;
2356 target += remaining * tChannels + targetFramePaddingElements;
2362inline void FrameChannels::zipChannels<float, uint8_t, 3u>(
const float*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2364 ocean_assert(sourceFrames !=
nullptr);
2365 ocean_assert(targetFrame !=
nullptr);
2367 ocean_assert(width != 0u && height != 0u);
2368 ocean_assert(channels == 3u);
2370 constexpr unsigned int tChannels = 3u;
2372 bool allSourceFramesContinuous =
true;
2374 if (sourceFramesPaddingElements !=
nullptr)
2376 for (
unsigned int n = 0u; n < tChannels; ++n)
2378 if (sourceFramesPaddingElements[n] != 0u)
2380 allSourceFramesContinuous =
false;
2386 const float* source0 = sourceFrames[0];
2387 const float* source1 = sourceFrames[1];
2388 const float* source2 = sourceFrames[2];
2389 uint8_t* target = targetFrame;
2391 constexpr unsigned int tBlockSize = 16u;
2393 uint8x16x3_t target_8x16x3;
2395 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2397 const unsigned int pixels = width * height;
2398 const unsigned int blocks = pixels / tBlockSize;
2399 const unsigned int remaining = pixels % tBlockSize;
2401 for (
unsigned int n = 0u; n < blocks; ++n)
2407 vst3q_u8(target, target_8x16x3);
2409 source0 += tBlockSize;
2410 source1 += tBlockSize;
2411 source2 += tBlockSize;
2413 target += tBlockSize * tChannels;
2416 for (
unsigned int n = 0u; n < remaining; ++n)
2418 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2419 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2420 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2422 target[n * tChannels + 0u] = uint8_t(source0[n]);
2423 target[n * tChannels + 1u] = uint8_t(source1[n]);
2424 target[n * tChannels + 2u] = uint8_t(source2[n]);
2429 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2430 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2431 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[2];
2433 const unsigned int blocks = width / tBlockSize;
2434 const unsigned int remaining = width % tBlockSize;
2436 for (
unsigned int y = 0u; y < height; ++y)
2438 for (
unsigned int n = 0u; n < blocks; ++n)
2445 vst3q_u8(target, target_8x16x3);
2447 source0 += tBlockSize;
2448 source1 += tBlockSize;
2449 source2 += tBlockSize;
2451 target += tBlockSize * tChannels;
2454 for (
unsigned int n = 0u; n < remaining; ++n)
2456 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2457 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2458 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2460 target[n * tChannels + 0u] = uint8_t(source0[n]);
2461 target[n * tChannels + 1u] = uint8_t(source1[n]);
2462 target[n * tChannels + 2u] = uint8_t(source2[n]);
2465 source0 += remaining + sourceFrame0PaddingElements;
2466 source1 += remaining + sourceFrame1PaddingElements;
2467 source2 += remaining + sourceFrame2PaddingElements;
2468 target += remaining * tChannels + targetFramePaddingElements;
2474inline void FrameChannels::zipChannels<float, uint8_t, 4u>(
const float*
const* sourceFrames, uint8_t*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2476 ocean_assert(sourceFrames !=
nullptr);
2477 ocean_assert(targetFrame !=
nullptr);
2479 ocean_assert(width != 0u && height != 0u);
2480 ocean_assert(channels == 4u);
2482 constexpr unsigned int tChannels = 4u;
2484 bool allSourceFramesContinuous =
true;
2486 if (sourceFramesPaddingElements !=
nullptr)
2488 for (
unsigned int n = 0u; n < tChannels; ++n)
2490 if (sourceFramesPaddingElements[n] != 0u)
2492 allSourceFramesContinuous =
false;
2498 const float* source0 = sourceFrames[0];
2499 const float* source1 = sourceFrames[1];
2500 const float* source2 = sourceFrames[2];
2501 const float* source3 = sourceFrames[3];
2502 uint8_t* target = targetFrame;
2504 constexpr unsigned int tBlockSize = 16u;
2506 uint8x16x4_t target_8x16x4;
2508 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2510 const unsigned int pixels = width * height;
2511 const unsigned int blocks = pixels / tBlockSize;
2512 const unsigned int remaining = pixels % tBlockSize;
2514 for (
unsigned int n = 0u; n < blocks; ++n)
2521 vst4q_u8(target, target_8x16x4);
2523 source0 += tBlockSize;
2524 source1 += tBlockSize;
2525 source2 += tBlockSize;
2526 source3 += tBlockSize;
2528 target += tBlockSize * tChannels;
2531 for (
unsigned int n = 0u; n < remaining; ++n)
2533 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2534 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2535 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2536 ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
2538 target[n * tChannels + 0u] = uint8_t(source0[n]);
2539 target[n * tChannels + 1u] = uint8_t(source1[n]);
2540 target[n * tChannels + 2u] = uint8_t(source2[n]);
2541 target[n * tChannels + 3u] = uint8_t(source3[n]);
2546 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[0];
2547 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[1];
2548 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[2];
2549 const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements ==
nullptr ? 0u : sourceFramesPaddingElements[3];
2551 const unsigned int blocks = width / tBlockSize;
2552 const unsigned int remaining = width % tBlockSize;
2554 for (
unsigned int y = 0u; y < height; ++y)
2556 for (
unsigned int n = 0u; n < blocks; ++n)
2563 vst4q_u8(target, target_8x16x4);
2565 source0 += tBlockSize;
2566 source1 += tBlockSize;
2567 source2 += tBlockSize;
2568 source3 += tBlockSize;
2570 target += tBlockSize * tChannels;
2573 for (
unsigned int n = 0u; n < remaining; ++n)
2575 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2576 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2577 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2578 ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
2580 target[n * tChannels + 0u] = uint8_t(source0[n]);
2581 target[n * tChannels + 1u] = uint8_t(source1[n]);
2582 target[n * tChannels + 2u] = uint8_t(source2[n]);
2583 target[n * tChannels + 3u] = uint8_t(source3[n]);
2586 source0 += remaining + sourceFrame0PaddingElements;
2587 source1 += remaining + sourceFrame1PaddingElements;
2588 source2 += remaining + sourceFrame2PaddingElements;
2589 source3 += remaining + sourceFrame3PaddingElements;
2590 target += remaining * tChannels + targetFramePaddingElements;
2597template <
typename TSource,
typename TTarget,
unsigned int tChannels>
2598void FrameChannels::zipChannels(
const TSource*
const* sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
2600 ocean_assert(sourceFrames !=
nullptr);
2601 ocean_assert(targetFrame !=
nullptr);
2603 ocean_assert(width != 0u && height != 0u);
2609 zipChannelsRuntime<TSource, TTarget>(sourceFrames, targetFrame, width, height, channels, sourceFramesPaddingElements, targetFramePaddingElements);
2613 bool allSourceFramesContinuous =
true;
2615 if (sourceFramesPaddingElements !=
nullptr)
2617 for (
unsigned int n = 0u; n < tChannels; ++n)
2619 if (sourceFramesPaddingElements[n] != 0u)
2621 allSourceFramesContinuous =
false;
2627 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2629 for (
unsigned int n = 0u; n < width * height; ++n)
2631 for (
unsigned int c = 0u; c < tChannels; ++c)
2633 targetFrame[n * tChannels + c] = TTarget(sourceFrames[c][n]);
2639 const unsigned int targetFrameStrideElements = width * tChannels + targetFramePaddingElements;
2641 Indices32 sourceFrameStrideElements(tChannels);
2643 for (
unsigned int c = 0u; c < tChannels; ++c)
2645 if (sourceFramesPaddingElements ==
nullptr)
2647 sourceFrameStrideElements[c] = width;
2651 sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
2655 for (
unsigned int y = 0u; y < height; ++y)
2657 TTarget*
const targetRow = targetFrame + y * targetFrameStrideElements;
2659 for (
unsigned int x = 0u; x < width; ++x)
2661 for (
unsigned int c = 0u; c < tChannels; ++c)
2663 *(targetRow + x * tChannels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x));
2670template <
typename TSource,
typename TTarget>
2671void FrameChannels::zipChannels(
const std::initializer_list<const TSource*>& sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const std::initializer_list<unsigned int>& sourceFramePaddingElements,
const unsigned int targetFramePaddingElements)
2673 ocean_assert(sourceFrames.size() >= 1);
2674 ocean_assert(sourceFramePaddingElements.size() == 0 || sourceFrames.size() == sourceFramePaddingElements.size());
2676 if (sourceFrames.size() == 2)
2678 zipChannels<TSource, TTarget, 2u>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2680 else if (sourceFrames.size() == 3)
2682 zipChannels<TSource, TTarget, 3u>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2684 else if (sourceFrames.size() == 4)
2686 zipChannels<TSource, TTarget, 4u>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2690 zipChannels<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrames.begin(), targetFrame, width, height, (
unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ?
nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2694template <
typename T,
unsigned int tSourceChannels>
2695inline void FrameChannels::addFirstChannel(
const T* source,
const T* sourceNewChannel, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int sourceNewChannelPaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
2697 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
2699 ocean_assert(source !=
nullptr && sourceNewChannel !=
nullptr && target !=
nullptr);
2700 ocean_assert(source != target);
2701 ocean_assert(width >= 1u && height >= 1u);
2703 const unsigned int options[3] = {sourcePaddingElements, sourceNewChannelPaddingElements, targetPaddingElements};
2705 const void* sources[2] = {source, sourceNewChannel};
2707 FrameConverter::convertArbitraryPixelFormat(sources, (
void**)&target, width, height, conversionFlag, 1u, FrameChannels::addChannelRow<T, tSourceChannels, true>, options, worker);
2710template <
typename T,
unsigned int tSourceChannels>
2713 static_assert(tSourceChannels >= 1u,
"Invalid channel number!");
2715 ocean_assert(source !=
nullptr && target !=
nullptr);
2716 ocean_assert(width >= 1u && height >= 1u);
2718 const unsigned int targetChannels = tSourceChannels + 1u;
2720 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2721 const unsigned int targetStrideElements = width * targetChannels + targetPaddingElements;
2723 const void* channelValueParameter = (
const void*)(&newChannelValue);
2725 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2727 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::addChannelValueRow<T, tSourceChannels, true>, FrameChannels::reverseRowPixelOrderInPlace<T, targetChannels>, areContinuous, channelValueParameter, worker);
2730template <
typename T,
unsigned int tSourceChannels>
2731inline void FrameChannels::addLastChannel(
const T* source,
const T* sourceNewChannel, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int sourceNewChannelPaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
2733 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
2735 ocean_assert(source !=
nullptr && sourceNewChannel !=
nullptr && target !=
nullptr);
2736 ocean_assert(source != target);
2737 ocean_assert(width >= 1u && height >= 1u);
2739 const unsigned int options[3] = {sourcePaddingElements, sourceNewChannelPaddingElements, targetPaddingElements};
2741 const void* sources[2] = {source, sourceNewChannel};
2743 FrameConverter::convertArbitraryPixelFormat(sources, (
void**)&target, width, height, conversionFlag, 1u, FrameChannels::addChannelRow<T, tSourceChannels, false>, options, worker);
2746template <
typename T,
unsigned int tSourceChannels>
2747inline void FrameChannels::addLastChannelValue(
const T* source,
const T newChannelValue, T* target,
const unsigned int width,
const unsigned int height,
const ConversionFlag conversionFlag,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
2749 static_assert(tSourceChannels >= 1u,
"Invalid channel number!");
2751 ocean_assert(source !=
nullptr && target !=
nullptr);
2752 ocean_assert(width >= 1u && height >= 1u);
2754 const unsigned int targetChannels = tSourceChannels + 1u;
2756 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2757 const unsigned int targetStrideElements = width * targetChannels + targetPaddingElements;
2759 const void* channelValueParameter = (
const void*)(&newChannelValue);
2761 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2763 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::addChannelValueRow<T, tSourceChannels, false>, FrameChannels::reverseRowPixelOrderInPlace<T, targetChannels>, areContinuous, channelValueParameter, worker);
2766template <
typename T,
unsigned int tSourceChannels>
2769 static_assert(tSourceChannels >= 2u && tSourceChannels <= 8u,
"Invalid channel number!");
2771 ocean_assert(source !=
nullptr && target !=
nullptr);
2772 ocean_assert(width >= 1u && height >= 1u);
2774 const unsigned int shufflePatternMax = 0x07654321u;
2775 const unsigned int mask = 0xFFFFFFFFu >> ((8u - tSourceChannels + 1u) * 4u);
2777 const unsigned int shufflePattern = shufflePatternMax & mask;
2779 FrameChannels::shuffleChannels<T, tSourceChannels, tSourceChannels - 1u, shufflePattern>(source, target, width, height, conversionFlag, sourcePaddingElements, targetPaddingElements, worker);
2782template <
typename T,
unsigned int tSourceChannels>
2785 static_assert(tSourceChannels >= 2u && tSourceChannels <= 8u,
"Invalid channel number!");
2787 ocean_assert(source !=
nullptr && target !=
nullptr);
2788 ocean_assert(width >= 1u && height >= 1u);
2790 const unsigned int shufflePatternMax = 0x76543210u;
2791 const unsigned int mask = 0xFFFFFFFFu >> ((8u - tSourceChannels + 1u) * 4u);
2793 const unsigned int shufflePattern = shufflePatternMax & mask;
2795 FrameChannels::shuffleChannels<T, tSourceChannels, tSourceChannels - 1u, shufflePattern>(source, target, width, height, conversionFlag, sourcePaddingElements, targetPaddingElements, worker);
2798template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tSourceChannelIndex,
unsigned int tTargetChannelIndex>
2799inline void FrameChannels::copyChannel(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
2801 static_assert(tSourceChannels >= 1u,
"Invalid number of channels!");
2802 static_assert(tTargetChannels >= 1u,
"Invalid number of channels!");
2804 static_assert(tSourceChannelIndex < tSourceChannels,
"Invalid channel index!");
2805 static_assert(tTargetChannelIndex < tTargetChannels,
"Invalid channel index!");
2807 ocean_assert(source !=
nullptr && target !=
nullptr);
2808 ocean_assert(width >= 1u && height >= 1u);
2810 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2811 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
2815 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2817 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements,
CONVERT_NORMAL, FrameChannels::copyChannelRow<T, tSourceChannels, tTargetChannels, tSourceChannelIndex, tTargetChannelIndex>, reversePixelOrderRowInPlaceFunction, areContinuous,
nullptr, worker);
2820template <
typename T,
unsigned int tChannel,
unsigned int tChannels>
2821inline void FrameChannels::setChannel(T* frame,
const unsigned int width,
const unsigned int height,
const T value,
const unsigned int framePaddingElements,
Worker* worker)
2823 static_assert(tChannels >= 1u,
"Invalid channel number!");
2824 static_assert(tChannel < tChannels,
"Invalid channel index!");
2826 ocean_assert(frame !=
nullptr);
2827 ocean_assert(width >= 1u && height >= 1u);
2831 worker->
executeFunction(
Worker::Function::createStatic(&setChannelSubset<T, tChannel, tChannels>, frame, width, value, framePaddingElements, 0u, 0u), 0u, height);
2835 setChannelSubset<T, tChannel, tChannels>(frame, width, value, framePaddingElements, 0u, height);
2839template <
typename T,
unsigned int tChannels>
2842 static_assert(tChannels >= 1u,
"Invalid channel number!");
2844 ocean_assert(source !=
nullptr && target !=
nullptr);
2845 ocean_assert(width >= 1u && height >= 1u);
2847 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
2848 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
2850 constexpr bool areContinuous =
false;
2852 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::reverseRowChannelOrder<T, tChannels>, FrameChannels::reverseRowPixelOrderInPlace<T, tChannels>, areContinuous,
nullptr, worker);
2855template <
typename T,
unsigned int tChannels>
2858 static_assert(tChannels >= 1u,
"Invalid channel number!");
2860 ocean_assert(source !=
nullptr && target !=
nullptr);
2861 ocean_assert(size >= 1);
2864 const T*
const debugSourceStart = source;
2865 const T*
const debugSourceEnd = debugSourceStart + size * tChannels;
2867 const T*
const debugTargetStart = target;
2868 const T*
const debugTargetEnd = debugTargetStart + size * tChannels;
2872 target += size * tChannels;
2874 const T*
const sourceEnd = source + size * tChannels;
2876#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
2878 if constexpr (std::is_same<typename TypeMapper<T>::Type, uint8_t>::value)
2880 const size_t blocks16 = size /
size_t(16);
2886 for (
size_t n = 0; n < blocks16; ++n)
2888 target -= 16u * tChannels;
2890 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2891 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2893 const uint8x16_t source_u_8x16 = vld1q_u8((
const uint8_t*)(source));
2894 uint8x16_t revSource_u_8x16 = vrev64q_u8(source_u_8x16);
2895 revSource_u_8x16 = vcombine_u8(vget_high_u8(revSource_u_8x16), vget_low_u8(revSource_u_8x16));
2897 vst1q_u8((uint8_t*)(target), revSource_u_8x16);
2899 source += 16u * tChannels;
2907 for (
size_t n = 0; n < blocks16; ++n)
2909 target -= 16u * tChannels;
2911 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2912 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2914 const uint8x16_t sourceA_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 0);
2915 const uint8x16_t sourceB_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 16);
2917 const uint8x16_t revSourceA_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u8(sourceA_u_8x16)));
2918 const uint8x16_t revSourceB_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u8(sourceB_u_8x16)));
2920 const uint8x16_t targetA_u_8x16 = vcombine_u8(vget_high_u8(revSourceA_u_8x16), vget_low_u8(revSourceA_u_8x16));
2921 const uint8x16_t targetB_u_8x16 = vcombine_u8(vget_high_u8(revSourceB_u_8x16), vget_low_u8(revSourceB_u_8x16));
2923 vst1q_u8((uint8_t*)(target) + 0, targetB_u_8x16);
2924 vst1q_u8((uint8_t*)(target) + 16, targetA_u_8x16);
2926 source += 16u * tChannels;
2934 for (
size_t n = 0; n < blocks16; ++n)
2936 target -= 16u * tChannels;
2938 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2939 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2941 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)(source));
2943 uint8x16x3_t revSource_u_8x16x3;
2944 revSource_u_8x16x3.val[0] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[0])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[0])));
2945 revSource_u_8x16x3.val[1] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[1])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[1])));
2946 revSource_u_8x16x3.val[2] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[2])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[2])));
2948 vst3q_u8((uint8_t*)(target), revSource_u_8x16x3);
2950 source += 16u * tChannels;
2958 for (
size_t n = 0; n < blocks16; ++n)
2960 target -= 16u * tChannels;
2962 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2963 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2965 const uint8x16_t sourceA_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 0);
2966 const uint8x16_t sourceB_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 16);
2967 const uint8x16_t sourceC_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 32);
2968 const uint8x16_t sourceD_u_8x16 = vld1q_u8((
const uint8_t*)(source) + 48);
2970 const uint8x16_t revSourceA_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceA_u_8x16)));
2971 const uint8x16_t revSourceB_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceB_u_8x16)));
2972 const uint8x16_t revSourceC_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceC_u_8x16)));
2973 const uint8x16_t revSourceD_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceD_u_8x16)));
2975 const uint8x16_t targetA_u_8x16 = vcombine_u8(vget_high_u8(revSourceA_u_8x16), vget_low_u8(revSourceA_u_8x16));
2976 const uint8x16_t targetB_u_8x16 = vcombine_u8(vget_high_u8(revSourceB_u_8x16), vget_low_u8(revSourceB_u_8x16));
2977 const uint8x16_t targetC_u_8x16 = vcombine_u8(vget_high_u8(revSourceC_u_8x16), vget_low_u8(revSourceC_u_8x16));
2978 const uint8x16_t targetD_u_8x16 = vcombine_u8(vget_high_u8(revSourceD_u_8x16), vget_low_u8(revSourceD_u_8x16));
2980 vst1q_u8((uint8_t*)(target) + 0, targetD_u_8x16);
2981 vst1q_u8((uint8_t*)(target) + 16, targetC_u_8x16);
2982 vst1q_u8((uint8_t*)(target) + 32, targetB_u_8x16);
2983 vst1q_u8((uint8_t*)(target) + 48, targetA_u_8x16);
2985 source += 16u * tChannels;
2998 while (source != sourceEnd)
3000 ocean_assert(source < sourceEnd);
3002 for (
unsigned int n = 0u; n < tChannels; ++n)
3004 ocean_assert(source + tChannels - n - 1u >= debugSourceStart);
3005 ocean_assert(source + tChannels - n - 1u < debugSourceEnd);
3007 ocean_assert(target > debugTargetStart && target <= debugTargetEnd);
3009 *--target = source[tChannels - n - 1u];
3012 source += tChannels;
3016template <
typename T,
unsigned int tChannels>
3019 static_assert(tChannels >= 1u,
"Invalid channel number!");
3021 ocean_assert(data !=
nullptr);
3022 ocean_assert(size >= 1);
3028#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3030 if constexpr (std::is_same<typename TypeMapper<T>::Type, uint8_t>::value)
3034 const size_t blocks32 = size /
size_t(32);
3036 uint8_t* left = (uint8_t*)(data);
3037 uint8_t* right = (uint8_t*)(data) + (size - 16u) * tChannels;
3043 for (
size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3045 const uint8x16_t left_u_8x16 = vld1q_u8(left);
3046 const uint8x16_t right_u_8x16 = vld1q_u8(right);
3048 uint8x16_t revLeft_u_8x16 = vrev64q_u8(left_u_8x16);
3049 revLeft_u_8x16 = vcombine_u8(vget_high_u8(revLeft_u_8x16), vget_low_u8(revLeft_u_8x16));
3051 uint8x16_t revRight_u_8x16 = vrev64q_u8(right_u_8x16);
3052 revRight_u_8x16 = vcombine_u8(vget_high_u8(revRight_u_8x16), vget_low_u8(revRight_u_8x16));
3054 vst1q_u8(left, revRight_u_8x16);
3055 vst1q_u8(right, revLeft_u_8x16);
3057 left += 16u * tChannels;
3058 right -= 16u * tChannels;
3061 n += blocks32 * 16u;
3068 for (
size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3070 const uint8x16x2_t left_u_8x16x2 = vld2q_u8(left);
3071 const uint8x16x2_t right_u_8x16x2 = vld2q_u8(right);
3073 uint8x16x2_t revLeft_u_8x16x2;
3074 revLeft_u_8x16x2.val[0] = vrev64q_u8(left_u_8x16x2.val[0]);
3075 revLeft_u_8x16x2.val[1] = vrev64q_u8(left_u_8x16x2.val[1]);
3076 revLeft_u_8x16x2.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x2.val[0]), vget_low_u8(revLeft_u_8x16x2.val[0]));
3077 revLeft_u_8x16x2.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x2.val[1]), vget_low_u8(revLeft_u_8x16x2.val[1]));
3079 uint8x16x2_t revRight_u_8x16x2;
3080 revRight_u_8x16x2.val[0] = vrev64q_u8(right_u_8x16x2.val[0]);
3081 revRight_u_8x16x2.val[1] = vrev64q_u8(right_u_8x16x2.val[1]);
3082 revRight_u_8x16x2.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x2.val[0]), vget_low_u8(revRight_u_8x16x2.val[0]));
3083 revRight_u_8x16x2.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x2.val[1]), vget_low_u8(revRight_u_8x16x2.val[1]));
3085 vst2q_u8(left, revRight_u_8x16x2);
3086 vst2q_u8(right, revLeft_u_8x16x2);
3088 left += 16u * tChannels;
3089 right -= 16u * tChannels;
3092 n += blocks32 * 16u;
3099 for (
size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3101 const uint8x16x3_t left_u_8x16x3 = vld3q_u8(left);
3102 const uint8x16x3_t right_u_8x16x3 = vld3q_u8(right);
3104 uint8x16x3_t revLeft_u_8x16x3;
3105 revLeft_u_8x16x3.val[0] = vrev64q_u8(left_u_8x16x3.val[0]);
3106 revLeft_u_8x16x3.val[1] = vrev64q_u8(left_u_8x16x3.val[1]);
3107 revLeft_u_8x16x3.val[2] = vrev64q_u8(left_u_8x16x3.val[2]);
3108 revLeft_u_8x16x3.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[0]), vget_low_u8(revLeft_u_8x16x3.val[0]));
3109 revLeft_u_8x16x3.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[1]), vget_low_u8(revLeft_u_8x16x3.val[1]));
3110 revLeft_u_8x16x3.val[2] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[2]), vget_low_u8(revLeft_u_8x16x3.val[2]));
3112 uint8x16x3_t revRight_u_8x16x3;
3113 revRight_u_8x16x3.val[0] = vrev64q_u8(right_u_8x16x3.val[0]);
3114 revRight_u_8x16x3.val[1] = vrev64q_u8(right_u_8x16x3.val[1]);
3115 revRight_u_8x16x3.val[2] = vrev64q_u8(right_u_8x16x3.val[2]);
3116 revRight_u_8x16x3.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[0]), vget_low_u8(revRight_u_8x16x3.val[0]));
3117 revRight_u_8x16x3.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[1]), vget_low_u8(revRight_u_8x16x3.val[1]));
3118 revRight_u_8x16x3.val[2] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[2]), vget_low_u8(revRight_u_8x16x3.val[2]));
3120 vst3q_u8(left, revRight_u_8x16x3);
3121 vst3q_u8(right, revLeft_u_8x16x3);
3123 left += 16u * tChannels;
3124 right -= 16u * tChannels;
3127 n += blocks32 * 16u;
3134 for (
size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3136 const uint8x16x4_t left_u_8x16x4 = vld4q_u8(left);
3137 const uint8x16x4_t right_u_8x16x4 = vld4q_u8(right);
3139 uint8x16x4_t revLeft_u_8x16x4;
3140 revLeft_u_8x16x4.val[0] = vrev64q_u8(left_u_8x16x4.val[0]);
3141 revLeft_u_8x16x4.val[1] = vrev64q_u8(left_u_8x16x4.val[1]);
3142 revLeft_u_8x16x4.val[2] = vrev64q_u8(left_u_8x16x4.val[2]);
3143 revLeft_u_8x16x4.val[3] = vrev64q_u8(left_u_8x16x4.val[3]);
3144 revLeft_u_8x16x4.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[0]), vget_low_u8(revLeft_u_8x16x4.val[0]));
3145 revLeft_u_8x16x4.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[1]), vget_low_u8(revLeft_u_8x16x4.val[1]));
3146 revLeft_u_8x16x4.val[2] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[2]), vget_low_u8(revLeft_u_8x16x4.val[2]));
3147 revLeft_u_8x16x4.val[3] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[3]), vget_low_u8(revLeft_u_8x16x4.val[3]));
3149 uint8x16x4_t revRight_u_8x16x4;
3150 revRight_u_8x16x4.val[0] = vrev64q_u8(right_u_8x16x4.val[0]);
3151 revRight_u_8x16x4.val[1] = vrev64q_u8(right_u_8x16x4.val[1]);
3152 revRight_u_8x16x4.val[2] = vrev64q_u8(right_u_8x16x4.val[2]);
3153 revRight_u_8x16x4.val[3] = vrev64q_u8(right_u_8x16x4.val[3]);
3154 revRight_u_8x16x4.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[0]), vget_low_u8(revRight_u_8x16x4.val[0]));
3155 revRight_u_8x16x4.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[1]), vget_low_u8(revRight_u_8x16x4.val[1]));
3156 revRight_u_8x16x4.val[2] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[2]), vget_low_u8(revRight_u_8x16x4.val[2]));
3157 revRight_u_8x16x4.val[3] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[3]), vget_low_u8(revRight_u_8x16x4.val[3]));
3159 vst4q_u8(left, revRight_u_8x16x4);
3160 vst4q_u8(right, revLeft_u_8x16x4);
3162 left += 16u * tChannels;
3163 right -= 16u * tChannels;
3166 n += blocks32 * 16u;
3179 PixelType intermediate;
3181 PixelType*
const pixels = (PixelType*)(data);
3183 while (n < size / 2)
3185 intermediate = pixels[n];
3187 pixels[n] = pixels[size - n - 1];
3188 pixels[size - n - 1] = intermediate;
3194template <
typename T,
unsigned int tChannels>
3197 ocean_assert(source !=
nullptr && target !=
nullptr);
3198 ocean_assert(source != target);
3199 ocean_assert(size >= 1);
3202 const T*
const debugSourceStart = source;
3203 const T*
const debugSourceEnd = debugSourceStart + size * tChannels;
3205 const T*
const debugTargetStart = target;
3206 const T*
const debugTargetEnd = debugTargetStart + size * tChannels;
3209 if constexpr (tChannels == 1)
3213 memcpy(target, source,
sizeof(T) * size);
3217 const T*
const sourceEnd = source + size * tChannels;
3219#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
3223 const size_t blocks16 = size /
size_t(16);
3228 ocean_assert(
false &&
"This should have been handled above!");
3233 for (
size_t n = 0; n < blocks16; ++n)
3237 source += 16u * tChannels;
3238 target += 16u * tChannels;
3246 for (
size_t n = 0; n < blocks16; ++n)
3250 source += 16u * tChannels;
3251 target += 16u * tChannels;
3259 for (
size_t n = 0; n < blocks16; ++n)
3263 source += 16u * tChannels;
3264 target += 16u * tChannels;
3275#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3279 const size_t blocks16 = size /
size_t(16);
3284 ocean_assert(
false &&
"This should have been handled above!");
3289 for (
size_t n = 0; n < blocks16; ++n)
3291 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3292 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3294 const uint8x16_t sourceA_u_8x16 = vld1q_u8((
const uint8_t*)source + 0);
3295 const uint8x16_t sourceB_u_8x16 = vld1q_u8((
const uint8_t*)source + 16);
3297 const uint8x16_t revSourceA_u_8x16 = vrev16q_u8(sourceA_u_8x16);
3298 const uint8x16_t revSourceB_u_8x16 = vrev16q_u8(sourceB_u_8x16);
3300 vst1q_u8((uint8_t*)target + 0, revSourceA_u_8x16);
3301 vst1q_u8((uint8_t*)target + 16, revSourceB_u_8x16);
3303 source += 16u * tChannels;
3304 target += 16u * tChannels;
3312 for (
size_t n = 0; n < blocks16; ++n)
3314 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3315 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3317 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3319 uint8x16x3_t revSource_u_8x16x3;
3320 revSource_u_8x16x3.val[0] = source_u_8x16x3.val[2];
3321 revSource_u_8x16x3.val[1] = source_u_8x16x3.val[1];
3322 revSource_u_8x16x3.val[2] = source_u_8x16x3.val[0];
3324 vst3q_u8((uint8_t*)target, revSource_u_8x16x3);
3326 source += 16u * tChannels;
3327 target += 16u * tChannels;
3335 for (
size_t n = 0; n < blocks16; ++n)
3337 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3338 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3340 const uint8x16_t sourceA_u_8x16 = vld1q_u8((
const uint8_t*)source + 0);
3341 const uint8x16_t sourceB_u_8x16 = vld1q_u8((
const uint8_t*)source + 16);
3342 const uint8x16_t sourceC_u_8x16 = vld1q_u8((
const uint8_t*)source + 32);
3343 const uint8x16_t sourceD_u_8x16 = vld1q_u8((
const uint8_t*)source + 48);
3345 const uint8x16_t revSourceA_u_8x16 = vrev32q_u8(sourceA_u_8x16);
3346 const uint8x16_t revSourceB_u_8x16 = vrev32q_u8(sourceB_u_8x16);
3347 const uint8x16_t revSourceC_u_8x16 = vrev32q_u8(sourceC_u_8x16);
3348 const uint8x16_t revSourceD_u_8x16 = vrev32q_u8(sourceD_u_8x16);
3350 vst1q_u8((uint8_t*)target + 0, revSourceA_u_8x16);
3351 vst1q_u8((uint8_t*)target + 16, revSourceB_u_8x16);
3352 vst1q_u8((uint8_t*)target + 32, revSourceC_u_8x16);
3353 vst1q_u8((uint8_t*)target + 48, revSourceD_u_8x16);
3355 source += 16u * tChannels;
3356 target += 16u * tChannels;
3369 while (source != sourceEnd)
3371 ocean_assert(source < sourceEnd);
3373 ocean_assert(source >= debugSourceStart && source + tChannels <= debugSourceEnd);
3374 ocean_assert(target >= debugTargetStart && target + tChannels <= debugTargetEnd);
3376 for (
unsigned int n = 0u; n < tChannels; ++n)
3378 target[n] = source[tChannels - n - 1u];
3381 source += tChannels;
3382 target += tChannels;
3386template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
3389 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u,
"Invalid channel number!");
3390 static_assert(tTargetChannels >= 1u && tTargetChannels <= 8u,
"Invalid channel number!");
3392 static_assert(tSourceChannels != 1u || tTargetChannels != 1u,
"Invalid channel number!");
3394 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels,
"Invalid shuffle pattern!");
3395 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels,
"Invalid shuffle pattern!");
3396 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels,
"Invalid shuffle pattern!");
3397 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels,
"Invalid shuffle pattern!");
3398 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels,
"Invalid shuffle pattern!");
3399 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels,
"Invalid shuffle pattern!");
3400 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels,
"Invalid shuffle pattern!");
3401 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels,
"Invalid shuffle pattern!");
3403 ocean_assert(source !=
nullptr && target !=
nullptr);
3404 ocean_assert(size != 0);
3406 const T*
const sourceEnd = source + size * tSourceChannels;
3408#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
3412 const size_t blocks16 = size /
size_t(16);
3414 switch (tSourceChannels | ((tTargetChannels) << 4u))
3417 case (4u | (4u << 4u)):
3421 constexpr unsigned int offset1 = 0x04040404u;
3422 constexpr unsigned int offset2 = 0x08080808u;
3423 constexpr unsigned int offset3 = 0x0C0C0C0Cu;
3426 const unsigned int shufflePattern0 = ((tShufflePattern & 0xF000u) << 12u) | ((tShufflePattern & 0x0F00u) << 8u) | ((tShufflePattern & 0x00F0u) << 4u) | ((tShufflePattern & 0x000Fu) << 0u);
3428 const unsigned int shufflePattern1 = shufflePattern0 + offset1;
3429 const unsigned int shufflePattern2 = shufflePattern0 + offset2;
3430 const unsigned int shufflePattern3 = shufflePattern0 + offset3;
3432 const __m128i shufflePattern128 =
SSE::set128i((((
unsigned long long)shufflePattern3) << 32ull) | (
unsigned long long)shufflePattern2, (((
unsigned long long)shufflePattern1) << 32ull) | (
unsigned long long)shufflePattern0);
3434 for (
size_t n = 0; n < blocks16; ++n)
3441 source += 16u * tSourceChannels;
3442 target += 16u * tTargetChannels;
3454#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3458 const size_t blocks16 = size /
size_t(16);
3460 switch (tSourceChannels | ((tTargetChannels) << 4u))
3463 case (1u | (3u << 4u)):
3465 static_assert(tSourceChannels != 1u || tShufflePattern == 0u,
"Invalid shuffle patter!");
3467 for (
size_t n = 0; n < blocks16; ++n)
3469 const uint8x16_t source_u_8x16 = vld1q_u8((
const uint8_t*)source);
3471 uint8x16x3_t target_u_8x16x3;
3473 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3475 target_u_8x16x3.val[nT] = source_u_8x16;
3478 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3480 source += 16u * tSourceChannels;
3481 target += 16u * tTargetChannels;
3488 case (2u | (1u << 4u)):
3490 for (
size_t n = 0; n < blocks16; ++n)
3492 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((
const uint8_t*)source);
3494 constexpr unsigned int sourceChannel = tShufflePattern & 0x00000001u;
3495 static_assert(sourceChannel <= 1u,
"Invalid shuffle pattern!");
3496 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3498 const uint8x16_t target_u_8x16 = source_u_8x16x2.val[sourceChannel];
3500 vst1q_u8((uint8_t*)target, target_u_8x16);
3502 source += 16u * tSourceChannels;
3503 target += 16u * tTargetChannels;
3510 case (2u | (3u << 4u)):
3512 for (
size_t n = 0; n < blocks16; ++n)
3514 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((
const uint8_t*)source);
3516 uint8x16x3_t target_u_8x16x3;
3518 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3520 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000001u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3522 target_u_8x16x3.val[nT] = source_u_8x16x2.val[(tShufflePattern >> (nT * 4u)) & 0x00000001u];
3525 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3527 source += 16u * tSourceChannels;
3528 target += 16u * tTargetChannels;
3535 case (2u | (4u << 4u)):
3537 for (
size_t n = 0; n < blocks16; ++n)
3539 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((
const uint8_t*)source);
3541 uint8x16x4_t target_u_8x16x4;
3543 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3545 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000001u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3547 target_u_8x16x4.val[nT] = source_u_8x16x2.val[(tShufflePattern >> (nT * 4u)) & 0x00000001u];
3550 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3552 source += 16u * tSourceChannels;
3553 target += 16u * tTargetChannels;
3560 case (3u | (1u << 4u)):
3562 constexpr unsigned int sourceChannel = (tShufflePattern & 0x0000000Fu) <= 2u ? (tShufflePattern & 0x0000000Fu) : 2u;
3563 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3565 for (
size_t n = 0; n < blocks16; ++n)
3567 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3569 const uint8x16_t target_u_8x16 = source_u_8x16x3.val[sourceChannel];
3571 vst1q_u8((uint8_t*)target, target_u_8x16);
3573 source += 16u * tSourceChannels;
3574 target += 16u * tTargetChannels;
3581 case (3u | (2u << 4u)):
3583 for (
size_t n = 0; n < blocks16; ++n)
3585 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3587 uint8x16x2_t target_u_8x16x2;
3589 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3591 target_u_8x16x2.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)];
3594 vst2q_u8((uint8_t*)target, target_u_8x16x2);
3596 source += 16u * tSourceChannels;
3597 target += 16u * tTargetChannels;
3604 case (3u | (3u << 4u)):
3606 for (
size_t n = 0; n < blocks16; ++n)
3608 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3610 uint8x16x3_t target_u_8x16x3;
3612 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3614 target_u_8x16x3.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)];
3617 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3619 source += 16u * tSourceChannels;
3620 target += 16u * tTargetChannels;
3627 case (4u | (1u << 4u)):
3629 for (
size_t n = 0; n < blocks16; ++n)
3631 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3633 constexpr unsigned int sourceChannel = tShufflePattern & 0x00000003u;
3634 static_assert(sourceChannel <= 3u,
"Invalid shuffle pattern!");
3636 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3638 const uint8x16_t target_u_8x16 = source_u_8x16x4.val[sourceChannel];
3640 vst1q_u8((uint8_t*)target, target_u_8x16);
3642 source += 16u * tSourceChannels;
3643 target += 16u * tTargetChannels;
3650 case (4u | (2u << 4u)):
3652 for (
size_t n = 0; n < blocks16; ++n)
3654 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3656 uint8x16x2_t target_u_8x16x2;
3658 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3660 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3662 target_u_8x16x2.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u];
3665 vst2q_u8((uint8_t*)target, target_u_8x16x2);
3667 source += 16u * tSourceChannels;
3668 target += 16u * tTargetChannels;
3675 case (4u | (3u << 4u)):
3677 for (
size_t n = 0; n < blocks16; ++n)
3679 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3681 uint8x16x3_t target_u_8x16x3;
3683 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3685 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3687 target_u_8x16x3.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u];
3690 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3692 source += 16u * tSourceChannels;
3693 target += 16u * tTargetChannels;
3700 case (4u | (4u << 4u)):
3702 for (
size_t n = 0; n < blocks16; ++n)
3704 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3706 uint8x16x4_t target_u_8x16x4;
3708 for (
unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3710 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3712 target_u_8x16x4.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u];
3715 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3717 source += 16u * tSourceChannels;
3718 target += 16u * tTargetChannels;
3732 while (source != sourceEnd)
3734 ocean_assert(source < sourceEnd);
3736 for (
unsigned int n = 0u; n < tTargetChannels; ++n)
3738 target[n] = source[(tShufflePattern >> (n * 4u)) & 0x0000000Fu];
3741 source += tSourceChannels;
3742 target += tTargetChannels;
3746template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
3749 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u,
"Invalid channel number!");
3750 static_assert(tTargetChannels >= 2u && tTargetChannels <= 8u,
"Invalid channel number!");
3752 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels,
"Invalid shuffle pattern!");
3753 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels,
"Invalid shuffle pattern!");
3754 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels,
"Invalid shuffle pattern!");
3755 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels,
"Invalid shuffle pattern!");
3756 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels,
"Invalid shuffle pattern!");
3757 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels,
"Invalid shuffle pattern!");
3758 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels,
"Invalid shuffle pattern!");
3759 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels,
"Invalid shuffle pattern!");
3761 ocean_assert(source !=
nullptr && target !=
nullptr);
3762 ocean_assert(size != 0);
3764 ocean_assert(options !=
nullptr);
3766 const T lastChannelValue = *(
const T*)(options);
3768 const T*
const sourceEnd = source + size * tSourceChannels;
3770#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3774 const size_t blocks16 = size /
size_t(16);
3776 switch (tSourceChannels | ((tTargetChannels) << 4u))
3779 case (1u | (4u << 4u)):
3781 ocean_assert(tShufflePattern == 0u);
3783 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3785 uint8x16x4_t target_u_8x16x4;
3786 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3788 for (
size_t n = 0; n < blocks16; ++n)
3790 const uint8x16_t source_u_8x16 = vld1q_u8((
const uint8_t*)source);
3792 for (
unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3794 target_u_8x16x4.val[nT] = source_u_8x16;
3797 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3799 source += 16u * tSourceChannels;
3800 target += 16u * tTargetChannels;
3807 case (3u | (4u << 4u)):
3809 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3811 uint8x16x4_t target_u_8x16x4;
3812 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3814 for (
size_t n = 0; n < blocks16; ++n)
3816 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((
const uint8_t*)source);
3818 for (
unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3820 target_u_8x16x4.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)];
3823 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3825 source += 16u * tSourceChannels;
3826 target += 16u * tTargetChannels;
3833 case (4u | (4u << 4u)):
3835 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3837 uint8x16x4_t target_u_8x16x4;
3838 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3840 for (
size_t n = 0; n < blocks16; ++n)
3842 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((
const uint8_t*)source);
3844 for (
unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3846 target_u_8x16x4.val[nT] = source_u_8x16x4.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 3u)];
3849 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3851 source += 16u * tSourceChannels;
3852 target += 16u * tTargetChannels;
3866 while (source != sourceEnd)
3868 ocean_assert(source < sourceEnd);
3870 for (
unsigned int n = 0u; n < tTargetChannels - 1u; ++n)
3872 target[n] = source[(tShufflePattern >> (n * 4u)) & 0x0000000Fu];
3873 target[tTargetChannels - 1u] = lastChannelValue;
3876 source += tSourceChannels;
3877 target += tTargetChannels;
3881template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
3884 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u,
"Invalid channel number!");
3885 static_assert(tTargetChannels >= 1u && tTargetChannels <= 8u,
"Invalid channel number!");
3887 static_assert(tSourceChannels != 1u || tTargetChannels != 1u,
"Invalid channel number!");
3889 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels,
"Invalid shuffle pattern!");
3890 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels,
"Invalid shuffle pattern!");
3891 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels,
"Invalid shuffle pattern!");
3892 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels,
"Invalid shuffle pattern!");
3893 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels,
"Invalid shuffle pattern!");
3894 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels,
"Invalid shuffle pattern!");
3895 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels,
"Invalid shuffle pattern!");
3896 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels,
"Invalid shuffle pattern!");
3898 ocean_assert(source !=
nullptr && target !=
nullptr);
3899 ocean_assert(width >= 1u && height >= 1u);
3901 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
3902 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
3904 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3906 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::shuffleRowChannels<T, tSourceChannels, tTargetChannels, tShufflePattern>, FrameChannels::reverseRowPixelOrderInPlace<T, tTargetChannels>, areContinuous,
nullptr, worker);
3909template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tShufflePattern>
3912 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u,
"Invalid channel number!");
3913 static_assert(tTargetChannels >= 2u && tTargetChannels <= 8u,
"Invalid channel number!");
3915 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels,
"Invalid shuffle pattern!");
3916 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels,
"Invalid shuffle pattern!");
3917 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels,
"Invalid shuffle pattern!");
3918 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels,
"Invalid shuffle pattern!");
3919 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels,
"Invalid shuffle pattern!");
3920 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels,
"Invalid shuffle pattern!");
3921 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels,
"Invalid shuffle pattern!");
3922 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels,
"Invalid shuffle pattern!");
3924 ocean_assert(source !=
nullptr && target !=
nullptr);
3925 ocean_assert(width >= 1u && height >= 1u);
3927 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
3928 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
3930 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3932 const T options = newChannelValue;
3934 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::shuffleRowChannelsAndSetLastChannelValue<T, tSourceChannels, tTargetChannels, tShufflePattern>, FrameChannels::reverseRowPixelOrderInPlace<T, tTargetChannels>, areContinuous, &options, worker);
3937template <
unsigned int tChannels>
3940 static_assert(tChannels >= 1u,
"Invalid channel number!");
3942 ocean_assert(source !=
nullptr && target !=
nullptr);
3943 ocean_assert(width >= 1u && height >= 1u);
3945 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
3946 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
3948 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3950 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::narrowRow16BitPerChannelTo8BitPerChannel<tChannels>, FrameChannels::reverseRowPixelOrderInPlace<uint8_t, tChannels>, areContinuous,
nullptr, worker);
3953template <
typename T,
unsigned int tChannels,
void (*tPixelFunction)(const T*, T*)>
3956 static_assert(tChannels > 0u,
"Invalid channel number!");
3958 ocean_assert(source && target);
3959 ocean_assert(width != 0u && height != 0u);
3962 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::applyPixelModifierSubset<T, tChannels, tPixelFunction>, source, target, width, height, conversionFlag, 0u, 0u), 0u, height);
3964 applyPixelModifierSubset<T, tChannels, tPixelFunction>(source, target, width, height, conversionFlag, 0u, height);
3968template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tPixelFunction)(const TSource*, TTarget*)>
3971 static_assert(tSourceChannels > 0u,
"Invalid source channel number!");
3972 static_assert(tTargetChannels > 0u,
"Invalid target channel number!");
3974 ocean_assert(source && target);
3975 ocean_assert(width != 0u && height != 0u);
3979 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::applyAdvancedPixelModifierSubset<TSource, TTarget, tSourceChannels, tTargetChannels, tPixelFunction>, source, target, width, height, sourcePaddingElements, targetPaddingElements, conversionFlag, 0u, 0u), 0u, height);
3983 applyAdvancedPixelModifierSubset<TSource, TTarget, tSourceChannels, tTargetChannels, tPixelFunction>(source, target, width, height, sourcePaddingElements, targetPaddingElements, conversionFlag, 0u, height);
3987template <
typename TSource0,
typename TSource1,
typename TTarget,
typename TIntermediate,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
3988void FrameChannels::applyBivariateOperator(
const TSource0* source0,
const TSource1* source1, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int source0PaddingElements,
const unsigned int source1PaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
Worker* worker)
3990 static_assert(tSourceChannels > 0u,
"Invalid source channel number!");
3991 static_assert(tTargetChannels > 0u,
"Invalid target channel number!");
3993 ocean_assert(source0 && source1 && target);
3994 ocean_assert(width != 0u && height != 0u);
3998 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::applyBivariateOperatorSubset<TSource0, TSource1, TTarget, TIntermediate, tSourceChannels, tTargetChannels, tOperator>, source0, source1, target, width, height, source0PaddingElements, source1PaddingElements, targetPaddingElements, conversionFlag, 0u, 0u), 0u, height);
4002 FrameChannels::applyBivariateOperatorSubset<TSource0, TSource1, TTarget, TIntermediate, tSourceChannels, tTargetChannels, tOperator>(source0, source1, target, width, height, source0PaddingElements, source1PaddingElements, targetPaddingElements, conversionFlag, 0u, height);
4006template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
4007void FrameChannels::applyRowOperator(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels>& rowOperatorFunction,
Worker* worker)
4009 static_assert(tSourceChannels > 0u,
"Invalid source channel number!");
4010 static_assert(tTargetChannels > 0u,
"Invalid target channel number!");
4012 ocean_assert(source !=
nullptr && target !=
nullptr);
4013 ocean_assert(width != 0u && height != 0u);
4015 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
4016 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
4020 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::applyRowOperatorSubset<TSource, TTarget, tSourceChannels, tTargetChannels>, source, target, width, height, sourceStrideElements, targetStrideElements, rowOperatorFunction, 0u, 0u), 0u, height);
4024 applyRowOperatorSubset<TSource, TTarget, tSourceChannels, tTargetChannels>(source, target, width, height, sourceStrideElements, targetStrideElements, rowOperatorFunction, 0u, height);
4028template <
typename T,
unsigned int tChannels>
4031 ocean_assert(source !=
nullptr && target !=
nullptr);
4032 ocean_assert(width >= 1u && height >= 1u);
4034 const unsigned int bytesPerRow = width *
sizeof(T) * tChannels;
4036 const unsigned int sourceStrideBytes = width *
sizeof(T) * tChannels +
sizeof(T) * sourcePaddingElements;
4037 const unsigned int targetStrideBytes = width *
sizeof(T) * tChannels +
sizeof(T) * targetPaddingElements;
4043 if (worker && height > 200u)
4045 worker->
executeFunction(
Worker::Function::createStatic(&
FrameChannels::transformGenericSubset, (
const uint8_t*)(source), (uint8_t*)(target), width, height, conversionFlag, rowReversePixelOrderFunction, bytesPerRow, sourceStrideBytes, targetStrideBytes, 0u, 0u), 0u, height, 9u, 10u, 20u);
4049 transformGenericSubset((
const uint8_t*)(source), (uint8_t*)(target), width, height, conversionFlag, rowReversePixelOrderFunction, bytesPerRow, sourceStrideBytes, targetStrideBytes, 0u, height);
4053template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
4056 static_assert(tChannels >= 2u,
"Invalid channel number!");
4057 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
4059 ocean_assert(frame !=
nullptr);
4060 ocean_assert(width >= 1u && height >= 1u);
4062 if (worker && height > 200u)
4064 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, frame, width, framePaddingElements, 0u, 0u), 0u, height, 3u, 4u, 20u);
4068 premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(frame, width, framePaddingElements, 0u, height);
4072template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
4075 static_assert(tChannels >= 2u,
"Invalid channel number!");
4076 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
4078 ocean_assert(source !=
nullptr && target !=
nullptr);
4079 ocean_assert(width >= 1u && height >= 1u);
4081 if (worker && height > 200u)
4083 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, source, target, width, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height, 5u, 6u, 20u);
4087 premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(source, target, width, sourcePaddingElements, targetPaddingElements, 0u, height);
4091template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
4094 static_assert(tChannels >= 2u,
"Invalid channel number!");
4095 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
4097 ocean_assert(frame !=
nullptr);
4098 ocean_assert(width >= 1u && height >= 1u);
4100 if (worker && height > 200u)
4102 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, frame, width, framePaddingElements, 0u, 0u), 0u, height, 3u, 4u, 20u);
4106 straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(frame, width, framePaddingElements, 0u, height);
4110template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
4113 static_assert(tChannels >= 2u,
"Invalid channel number!");
4114 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
4116 ocean_assert(source !=
nullptr && target !=
nullptr);
4117 ocean_assert(width >= 1u && height >= 1u);
4119 if (worker && height > 200u)
4121 worker->
executeFunction(
Worker::Function::createStatic(&FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, source, target, width, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height, 5u, 6u, 20u);
4125 straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(source, target, width, sourcePaddingElements, targetPaddingElements, 0u, height);
4129template <
unsigned int tChannels>
4132 static_assert(tChannels >= 1u,
"Invalid channel number!");
4134 ocean_assert(source !=
nullptr && target !=
nullptr);
4135 ocean_assert(size > 0);
4137 const uint16_t*
const sourceEnd = source + size * tChannels;
4139#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4141 const size_t blocks8 = size /
size_t(8);
4147 for (
size_t n = 0; n < blocks8; ++n)
4149 const uint16x8_t sourceA_u_16x8 = vld1q_u16(source + 0);
4150 const uint16x8_t sourceB_u_16x8 = vld1q_u16(source + 8);
4151 const uint16x8_t sourceC_u_16x8 = vld1q_u16(source + 16);
4152 const uint16x8_t sourceD_u_16x8 = vld1q_u16(source + 24);
4154 const uint8x16_t targetAB_u_8x16 = vcombine_u8(vqrshrn_n_u16(sourceA_u_16x8, 8), vqrshrn_n_u16(sourceB_u_16x8, 8));
4155 const uint8x16_t targetCD_u_8x16 = vcombine_u8(vqrshrn_n_u16(sourceC_u_16x8, 8), vqrshrn_n_u16(sourceD_u_16x8, 8));
4157 vst1q_u8(target + 0, targetAB_u_8x16);
4158 vst1q_u8(target + 16, targetCD_u_8x16);
4160 source += 8u * tChannels;
4161 target += 8u * tChannels;
4173 while (source != sourceEnd)
4175 ocean_assert(source < sourceEnd);
4177 for (
unsigned int n = 0u; n < tChannels; ++n)
4179 ocean_assert((uint16_t)(source[n] >> 8u) <= 255u);
4180 target[n] = (uint8_t)(source[n] >> 8u);
4183 source += tChannels;
4184 target += tChannels;
4188template <
typename T,
unsigned int tSourceChannels,
bool tAddToFront>
4191 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
4192 static_assert(
sizeof(
size_t) ==
sizeof(
const T*),
"Invalid pointer size!");
4194 ocean_assert(sources !=
nullptr && targets !=
nullptr);
4195 ocean_assert(width != 0u && height != 0u);
4196 ocean_assert(multipleRowIndex < height);
4197 ocean_assert(options !=
nullptr);
4199 const T* source = (
const T*)(sources[0]);
4200 const T* sourceOneChannel = (
const T*)(sources[1]);
4201 ocean_assert(source !=
nullptr && sourceOneChannel !=
nullptr);
4203 T* target = (T*)(targets[0]);
4204 ocean_assert(target !=
nullptr);
4206 const unsigned int* uintOptions = (
const unsigned int*)options;
4207 ocean_assert(uintOptions !=
nullptr);
4209 const unsigned int sourcePaddingElements = uintOptions[0];
4210 const unsigned int sourceOneChannelPaddingElements = uintOptions[1];
4211 const unsigned int targetPaddingElements = uintOptions[2];
4213 const unsigned int targetChannels = tSourceChannels + 1u;
4215 const unsigned int sourceStrideElements = tSourceChannels * width + sourcePaddingElements;
4216 const unsigned int sourceOneChannelStrideElements = width + sourceOneChannelPaddingElements;
4217 const unsigned int targetStrideElements = targetChannels * width + targetPaddingElements;
4222 const T* sourceRow = source + sourceStrideElements * multipleRowIndex;
4223 const T* sourceOneChannelRow = sourceOneChannel + sourceOneChannelStrideElements * multipleRowIndex;
4224 T* targetRow = flipTarget ? target + targetStrideElements * (height - multipleRowIndex - 1u) : target + targetStrideElements * multipleRowIndex;
4226 if (mirrorTarget ==
false)
4228 for (
unsigned int n = 0u; n < width; ++n)
4230 if constexpr (tAddToFront)
4232 targetRow[0] = sourceOneChannelRow[0];
4234 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4236 targetRow[c + 1u] = sourceRow[c];
4241 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4243 targetRow[c] = sourceRow[c];
4246 targetRow[tSourceChannels] = sourceOneChannelRow[0];
4249 sourceRow += tSourceChannels;
4250 sourceOneChannelRow++;
4252 targetRow += targetChannels;
4257 targetRow += targetChannels * (width - 1u);
4259 for (
unsigned int n = 0u; n < width; ++n)
4261 if constexpr (tAddToFront)
4263 targetRow[0] = sourceOneChannelRow[0];
4265 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4267 targetRow[c + 1u] = sourceRow[c];
4272 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4274 targetRow[c] = sourceRow[c];
4277 targetRow[tSourceChannels] = sourceOneChannelRow[0];
4280 sourceRow += tSourceChannels;
4281 sourceOneChannelRow++;
4283 targetRow -= targetChannels;
4288template <
typename T,
unsigned int tSourceChannels,
bool tAddToFront>
4291 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
4293 ocean_assert(source !=
nullptr && target !=
nullptr);
4294 ocean_assert(size > 0);
4295 ocean_assert(channelValueParameter !=
nullptr);
4297 const T& channelValue = *((
const T*)channelValueParameter);
4299 const unsigned int targetChannels = tSourceChannels + 1u;
4301 for (
size_t n = 0; n < size; ++n)
4303 if constexpr (tAddToFront)
4305 target[0] = channelValue;
4307 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4309 target[c + 1u] = source[c];
4314 for (
unsigned int c = 0u; c < tSourceChannels; ++c)
4316 target[c] = source[c];
4319 target[tSourceChannels] = channelValue;
4322 source += tSourceChannels;
4323 target += targetChannels;
4327template <
typename T,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
unsigned int tSourceChannelIndex,
unsigned int tTargetChannelIndex>
4330 static_assert(tSourceChannels != 0u,
"Invalid channel number!");
4331 static_assert(tTargetChannels != 0u,
"Invalid channel number!");
4333 static_assert(tSourceChannelIndex < tSourceChannels,
"Invalid channel number!");
4334 static_assert(tTargetChannelIndex < tTargetChannels,
"Invalid channel number!");
4336 ocean_assert(source !=
nullptr && target !=
nullptr);
4337 ocean_assert(size > 0);
4339 for (
size_t n = 0; n < size; ++n)
4341 target[tTargetChannelIndex] = source[tSourceChannelIndex];
4343 source += tSourceChannels;
4344 target += tTargetChannels;
4348template <
typename TSource,
typename TTarget>
4349void FrameChannels::separateTo1ChannelRuntime(
const TSource*
const sourceFrame, TTarget*
const*
const targetFrames,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourceFramePaddingElements,
const unsigned int* targetFramesPaddingElements)
4351 ocean_assert(sourceFrame !=
nullptr);
4352 ocean_assert(targetFrames !=
nullptr);
4354 ocean_assert(width != 0u && height != 0u);
4355 ocean_assert(channels != 0u);
4358 for (
unsigned int c = 0u; c < channels; ++c)
4360 ocean_assert(targetFrames[c] !=
nullptr);
4364 if (sourceFramePaddingElements == 0u && targetFramesPaddingElements ==
nullptr)
4366 for (
unsigned int n = 0u; n < width * height; ++n)
4368 for (
unsigned int c = 0u; c < channels; ++c)
4370 targetFrames[c][n] = TTarget(sourceFrame[n * channels + c]);
4374 else if (targetFramesPaddingElements ==
nullptr)
4376 ocean_assert(sourceFramePaddingElements != 0u);
4378 const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
4380 for (
unsigned int y = 0u; y < height; ++y)
4382 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
4384 const unsigned int targetRowOffset = y * width;
4386 for (
unsigned int x = 0u; x < width; ++x)
4388 for (
unsigned int c = 0u; c < channels; ++c)
4390 *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * channels + c));
4397 const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
4399 Indices32 targetFrameStrideElements(channels);
4401 for (
unsigned int c = 0u; c < channels; ++c)
4403 targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
4406 for (
unsigned int y = 0u; y < height; ++y)
4408 const TSource*
const sourceRow = sourceFrame + y * sourceFrameStrideElements;
4410 for (
unsigned int x = 0u; x < width; ++x)
4412 for (
unsigned int c = 0u; c < channels; ++c)
4414 *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * channels + c));
4421template <
typename TSource,
typename TTarget>
4422void FrameChannels::zipChannelsRuntime(
const TSource*
const* sourceFrames, TTarget*
const targetFrame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int* sourceFramesPaddingElements,
const unsigned int targetFramePaddingElements)
4424 ocean_assert(sourceFrames !=
nullptr);
4425 ocean_assert(targetFrame !=
nullptr);
4427 ocean_assert(width != 0u && height != 0u);
4428 ocean_assert(channels != 0u);
4430 bool allSourceFramesContinuous =
true;
4432 if (sourceFramesPaddingElements !=
nullptr)
4434 for (
unsigned int n = 0u; n < channels; ++n)
4436 if (sourceFramesPaddingElements[n] != 0u)
4438 allSourceFramesContinuous =
false;
4444 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
4446 for (
unsigned int n = 0u; n < width * height; ++n)
4448 for (
unsigned int c = 0u; c < channels; ++c)
4450 targetFrame[n * channels + c] = TTarget(sourceFrames[c][n]);
4456 const unsigned int targetFrameStrideElements = width * channels + targetFramePaddingElements;
4458 Indices32 sourceFrameStrideElements(channels);
4460 for (
unsigned int c = 0u; c < channels; ++c)
4462 if (sourceFramesPaddingElements ==
nullptr)
4464 sourceFrameStrideElements[c] = width;
4468 sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
4472 for (
unsigned int y = 0u; y < height; ++y)
4474 TTarget*
const targetRow = targetFrame + y * targetFrameStrideElements;
4476 for (
unsigned int x = 0u; x < width; ++x)
4478 for (
unsigned int c = 0u; c < channels; ++c)
4480 *(targetRow + x * channels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x));
4487template <
typename T,
unsigned int tChannel,
unsigned int tChannels>
4488void FrameChannels::setChannelSubset(T* frame,
const unsigned int width,
const T value,
const unsigned int framePaddingElements,
const unsigned int firstRow,
const unsigned int numberRows)
4490 static_assert(tChannels >= 1u,
"Invalid channel number!");
4491 static_assert(tChannel < tChannels,
"Invalid channel index!");
4493 ocean_assert(frame !=
nullptr);
4495 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
4497 frame += firstRow * frameStrideElements + tChannel;
4499 for (
unsigned int n = 0u; n < numberRows; ++n)
4501 for (
unsigned int x = 0u; x < width; ++x)
4503 frame[x * tChannels] = value;
4506 frame += frameStrideElements;
4510template <
typename T,
unsigned int tChannels,
void (*tPixelFunction)(const T*, T*)>
4513 static_assert(tChannels >= 1u,
"Invalid channel number");
4515 ocean_assert(source && target);
4516 ocean_assert(source != target);
4518 ocean_assert(numberRows > 0u);
4519 ocean_assert(firstRow + numberRows <= height);
4521 const unsigned int widthElements = width * tChannels;
4522 const unsigned int targetBlockSize = widthElements * numberRows;
4524 switch (conversionFlag)
4528 source += firstRow * widthElements;
4529 target += firstRow * widthElements;
4531 const T*
const targetEnd = target + targetBlockSize;
4533 while (target != targetEnd)
4535 tPixelFunction(source, target);
4537 source += tChannels;
4538 target += tChannels;
4546 source += firstRow * widthElements;
4547 target += width * height * tChannels - (firstRow + 1u) * widthElements;
4549 const T*
const targetEnd = target - targetBlockSize;
4551 while (target != targetEnd)
4553 const T*
const targetRowEnd = target + widthElements;
4555 while (target != targetRowEnd)
4557 tPixelFunction(source, target);
4559 source += tChannels;
4560 target += tChannels;
4563 target -= (widthElements << 1);
4571 source += firstRow * widthElements;
4572 target += (firstRow + 1u) * widthElements;
4574 const T*
const targetEnd = target + targetBlockSize;
4576 while (target != targetEnd)
4578 const T*
const targetRowEnd = target - widthElements;
4580 while (target != targetRowEnd)
4582 tPixelFunction(source, target -= tChannels);
4584 source += tChannels;
4587 target += widthElements << 1;
4595 source += firstRow * widthElements;
4596 target += width * height * tChannels - firstRow * widthElements;
4598 const T*
const targetEnd = target - targetBlockSize;
4600 while (target != targetEnd)
4602 tPixelFunction(source, target -= tChannels);
4604 source += tChannels;
4614template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tPixelFunction)(const TSource*, TTarget*)>
4615void FrameChannels::applyAdvancedPixelModifierSubset(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows)
4617 static_assert(tSourceChannels >= 1u,
"Invalid source channel number");
4618 static_assert(tTargetChannels >= 1u,
"Invalid target channel number");
4620 ocean_assert(source && target);
4621 ocean_assert((
void*)source != (
void*)target);
4623 ocean_assert(numberRows != 0u);
4624 ocean_assert(firstRow + numberRows <= height);
4626 const unsigned int sourceWidthElements = width * tSourceChannels;
4627 const unsigned int targetWidthElements = width * tTargetChannels;
4629 const unsigned int sourceStrideElements = sourceWidthElements + sourcePaddingElements;
4630 const unsigned int targetStrideElements = targetWidthElements + targetPaddingElements;
4632 switch (conversionFlag)
4636 for (
unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4638 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4639 TTarget* targetPixel = target + rowIndex * targetStrideElements;
4641 for (
unsigned int x = 0u; x < width; ++x)
4643 tPixelFunction(sourcePixel, targetPixel);
4645 sourcePixel += tSourceChannels;
4646 targetPixel += tTargetChannels;
4655 for (
unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4657 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4658 TTarget* targetPixel = target + (height - rowIndex - 1u) * targetStrideElements;
4660 for (
unsigned int x = 0u; x < width; ++x)
4662 tPixelFunction(sourcePixel, targetPixel);
4664 sourcePixel += tSourceChannels;
4665 targetPixel += tTargetChannels;
4674 for (
unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4676 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4678 TTarget*
const targetRowBegin = target + rowIndex * targetStrideElements;
4679 TTarget* targetPixel = targetRowBegin + targetWidthElements - tTargetChannels;
4681 for (
unsigned int x = 0u; x < width; ++x)
4683 ocean_assert(targetPixel >= targetRowBegin);
4684 tPixelFunction(sourcePixel, targetPixel);
4686 sourcePixel += tSourceChannels;
4687 targetPixel -= tTargetChannels;
4696 for (
unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4698 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4700 TTarget*
const targetRowBegin = target + (height - rowIndex - 1u) * targetStrideElements;
4701 TTarget* targetPixel = targetRowBegin + targetWidthElements - tTargetChannels;
4703 for (
unsigned int x = 0u; x < width; ++x)
4705 ocean_assert(targetPixel >= targetRowBegin);
4706 tPixelFunction(sourcePixel, targetPixel);
4708 sourcePixel += tSourceChannels;
4709 targetPixel -= tTargetChannels;
4720template <
typename TSource0,
typename TSource1,
typename TTarget,
typename TIntermediate,
unsigned int tSourceChannels,
unsigned int tTargetChannels,
void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
4721void FrameChannels::applyBivariateOperatorSubset(
const TSource0* source0,
const TSource1* source1, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int source0PaddingElements,
const unsigned int source1PaddingElements,
const unsigned int targetPaddingElements,
const ConversionFlag conversionFlag,
const unsigned int firstRow,
const unsigned int numberRows)
4723 static_assert(tSourceChannels >= 1u,
"Invalid source channel number");
4724 static_assert(tTargetChannels >= 1u,
"Invalid target channel number");
4725 static_assert(tOperator,
"Invalid operator function");
4727 ocean_assert(source0 !=
nullptr && source1 !=
nullptr && target !=
nullptr);
4728 ocean_assert((
const void*)(source0) != (
const void*)(target));
4729 ocean_assert((
const void*)(source1) != (
const void*)(target));
4731 ocean_assert(numberRows != 0u);
4732 ocean_assert(firstRow + numberRows <= height);
4734 const unsigned int source0StrideElements = width * tSourceChannels + source0PaddingElements;
4735 const unsigned int source1StrideElements = width * tSourceChannels + source1PaddingElements;
4737 const unsigned int targetWidthElements = width * tTargetChannels;
4739 const unsigned int targetStrideElements = targetWidthElements + targetPaddingElements;
4741 switch (conversionFlag)
4745 for (
unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4747 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4748 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4750 TTarget* rowTarget = target + rowIndex * targetStrideElements;
4751 const TTarget*
const rowTargetEnd = rowTarget + targetWidthElements;
4753 while (rowTarget != rowTargetEnd)
4755 ocean_assert(rowTarget < rowTargetEnd);
4757 tOperator(rowSource0, rowSource1, rowTarget);
4759 rowSource0 += tSourceChannels;
4760 rowSource1 += tSourceChannels;
4762 rowTarget += tTargetChannels;
4771 for (
unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4773 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4774 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4776 TTarget* rowTarget = target + (height - rowIndex - 1u) * targetStrideElements;
4777 const TTarget*
const rowTargetEnd = rowTarget + targetWidthElements;
4779 while (rowTarget != rowTargetEnd)
4781 ocean_assert(rowTarget < rowTargetEnd);
4783 tOperator(rowSource0, rowSource1, rowTarget);
4785 rowSource0 += tSourceChannels;
4786 rowSource1 += tSourceChannels;
4788 rowTarget += tTargetChannels;
4797 for (
unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4799 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4800 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4802 TTarget* rowTarget = target + rowIndex * targetStrideElements + targetWidthElements - tTargetChannels;
4803 const TTarget*
const rowTargetEnd = rowTarget - targetWidthElements;
4805 while (rowTarget != rowTargetEnd)
4807 ocean_assert(rowTarget > rowTargetEnd);
4809 tOperator(rowSource0, rowSource1, rowTarget);
4811 rowSource0 += tSourceChannels;
4812 rowSource1 += tSourceChannels;
4814 rowTarget -= tTargetChannels;
4823 for (
unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4825 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4826 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4828 TTarget* rowTarget = target + (height - rowIndex - 1u) * targetStrideElements + targetWidthElements - tTargetChannels;
4829 const TTarget*
const rowTargetEnd = rowTarget - targetWidthElements;
4831 while (rowTarget != rowTargetEnd)
4833 ocean_assert(rowTarget > rowTargetEnd);
4835 tOperator(rowSource0, rowSource1, rowTarget);
4837 rowSource0 += tSourceChannels;
4838 rowSource1 += tSourceChannels;
4840 rowTarget -= tTargetChannels;
4848 ocean_assert(
false &&
"This should never happen!");
4853template <
typename TSource,
typename TTarget,
unsigned int tSourceChannels,
unsigned int tTargetChannels>
4854void FrameChannels::applyRowOperatorSubset(
const TSource* source, TTarget* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourceStrideElements,
const unsigned int targetStrideElements,
const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels> rowOperatorFunction,
const unsigned int firstRow,
const unsigned int numberRows)
4856 static_assert(tSourceChannels >= 1u,
"Invalid source channel number");
4857 static_assert(tTargetChannels >= 1u,
"Invalid target channel number");
4859 ocean_assert(source !=
nullptr && target !=
nullptr);
4860 ocean_assert((
const void*)source != (
const void*)target);
4862 ocean_assert(width * tSourceChannels <= sourceStrideElements);
4863 ocean_assert(width * tTargetChannels <= targetStrideElements);
4865 ocean_assert(rowOperatorFunction !=
nullptr);
4867 ocean_assert(numberRows != 0u);
4868 ocean_assert(firstRow + numberRows <= height);
4870 for (
unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4872 rowOperatorFunction(source + y * sourceStrideElements, target + y * targetStrideElements, width, height, y, sourceStrideElements, targetStrideElements);
4876template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2>
4879 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2,
"Invalid channel factors!");
4881 ocean_assert(channelMultiplicationFactors_128 !=
nullptr);
4882 const unsigned int* channelFactors_128 =
reinterpret_cast<const unsigned int*
>(channelMultiplicationFactors_128);
4883 ocean_assert(channelFactors_128 !=
nullptr);
4885 const unsigned int factorChannel0_128 = channelFactors_128[0];
4886 const unsigned int factorChannel1_128 = channelFactors_128[1];
4887 const unsigned int factorChannel2_128 = channelFactors_128[2];
4889 ocean_assert(factorChannel0_128 <= 128u && factorChannel1_128 <= 128u && factorChannel2_128 <= 128u);
4890 ocean_assert(factorChannel0_128 + factorChannel1_128 + factorChannel2_128 == 128u);
4892 ocean_assert(tUseFactorChannel0 == (factorChannel0_128 != 0u));
4893 ocean_assert(tUseFactorChannel1 == (factorChannel1_128 != 0u));
4894 ocean_assert(tUseFactorChannel2 == (factorChannel2_128 != 0u));
4896 ocean_assert(source !=
nullptr && target !=
nullptr && size >= 1);
4898 const uint8_t*
const targetEnd = target + size;
4900#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
4902 constexpr size_t blockSize = 16;
4903 const size_t blocks = size / blockSize;
4905 const __m128i multiplicationFactors0_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel0_128));
4906 const __m128i multiplicationFactors1_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel1_128));
4907 const __m128i multiplicationFactors2_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel2_128));
4909 for (
size_t n = 0; n < blocks; ++n)
4913 source += blockSize *
size_t(3);
4914 target += blockSize;
4917#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4919 constexpr size_t blockSize = 8;
4920 const size_t blocks = size / blockSize;
4922 const uint8x8_t factorChannel0_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel0_128);
4923 const uint8x8_t factorChannel1_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel1_128);
4924 const uint8x8_t factorChannel2_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel2_128);
4926 for (
size_t n = 0; n < blocks; ++n)
4928 convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON<tUseFactorChannel0, tUseFactorChannel1, tUseFactorChannel2>(source, target, factorChannel0_128_u_8x8, factorChannel1_128_u_8x8, factorChannel2_128_u_8x8);
4930 source += blockSize *
size_t(3);
4931 target += blockSize;
4936 while (target != targetEnd)
4938 ocean_assert(target < targetEnd);
4940 const unsigned int channel0 = tUseFactorChannel0 ? (source[0] * factorChannel0_128) : 0u;
4941 const unsigned int channel1 = tUseFactorChannel1 ? (source[1] * factorChannel1_128) : 0u;
4942 const unsigned int channel2 = tUseFactorChannel2 ? (source[2] * factorChannel2_128) : 0u;
4944 *target++ = (uint8_t)((channel0 + channel1 + channel2 + 64u) >> 7u);
4949template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2,
bool tUseFactorChannel3>
4952 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2 || tUseFactorChannel3,
"Invalid channel factors!");
4954 ocean_assert(channelMultiplicationFactors_128 !=
nullptr);
4955 const unsigned int* channelFactors_128 =
reinterpret_cast<const unsigned int*
>(channelMultiplicationFactors_128);
4956 ocean_assert(channelFactors_128 !=
nullptr);
4958 const unsigned int factorChannel0_128 = channelFactors_128[0];
4959 const unsigned int factorChannel1_128 = channelFactors_128[1];
4960 const unsigned int factorChannel2_128 = channelFactors_128[2];
4961 const unsigned int factorChannel3_128 = channelFactors_128[3];
4963 ocean_assert(factorChannel0_128 <= 127u && factorChannel1_128 <= 127u && factorChannel2_128 <= 127u && factorChannel3_128 <= 127u);
4964 ocean_assert(factorChannel0_128 + factorChannel1_128 + factorChannel2_128 + factorChannel3_128 == 128u);
4966 ocean_assert(tUseFactorChannel0 == (factorChannel0_128 != 0u));
4967 ocean_assert(tUseFactorChannel1 == (factorChannel1_128 != 0u));
4968 ocean_assert(tUseFactorChannel2 == (factorChannel2_128 != 0u));
4969 ocean_assert(tUseFactorChannel3 == (factorChannel3_128 != 0u));
4971 ocean_assert(source !=
nullptr && target !=
nullptr && size >= 1);
4973 const uint8_t*
const targetEnd = target + size;
4975#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
4977 constexpr size_t blockSize = 16;
4978 const size_t blocks = size / blockSize;
4980 const __m128i m128_multiplicationFactors = _mm_set1_epi32(
int(factorChannel0_128 | (factorChannel1_128 << 8u) | (factorChannel2_128 << 16u) | (factorChannel3_128 << 24u)));
4982 for (
size_t n = 0; n < blocks; ++n)
4986 source += blockSize *
size_t(4);
4987 target += blockSize;
4990#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4992 constexpr size_t blockSize = 8;
4993 const size_t blocks = size / blockSize;
4995 const uint8x8_t factorChannel0_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel0_128);
4996 const uint8x8_t factorChannel1_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel1_128);
4997 const uint8x8_t factorChannel2_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel2_128);
4998 const uint8x8_t factorChannel3_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel3_128);
5000 for (
size_t n = 0; n < blocks; ++n)
5002 convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON<tUseFactorChannel0, tUseFactorChannel1, tUseFactorChannel2, tUseFactorChannel3>(source, target, factorChannel0_128_u_8x8, factorChannel1_128_u_8x8, factorChannel2_128_u_8x8, factorChannel3_128_u_8x8);
5004 source += blockSize *
size_t(4);
5005 target += blockSize;
5010 while (target != targetEnd)
5012 ocean_assert(target < targetEnd);
5014 const unsigned int channel0 = tUseFactorChannel0 ? (source[0] * factorChannel0_128) : 0u;
5015 const unsigned int channel1 = tUseFactorChannel1 ? (source[1] * factorChannel1_128) : 0u;
5016 const unsigned int channel2 = tUseFactorChannel2 ? (source[2] * factorChannel2_128) : 0u;
5017 const unsigned int channel3 = tUseFactorChannel3 ? (source[3] * factorChannel3_128) : 0u;
5019 *target++ = (uint8_t)((channel0 + channel1 + channel2 + channel3 + 64u) >> 7u);
5024template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
5027 static_assert(tChannels >= 2u,
"Invalid channel number!");
5028 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
5030 ocean_assert(frame !=
nullptr);
5031 ocean_assert(width >= 1u);
5033 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
5035 uint8_t* frameRow = frame + frameStrideElements * firstRow;
5037 for (
unsigned int y = 0u; y < numberRows; ++y)
5039 for (
unsigned int x = 0u; x < width; ++x)
5041 if (frameRow[tAlphaChannelIndex])
5043 const uint8_t alpha_2 = frameRow[tAlphaChannelIndex] / 2u;
5045 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5047 if (channelIndex != tAlphaChannelIndex)
5049 frameRow[channelIndex] = uint8_t(std::min((frameRow[channelIndex] * 255u + alpha_2) / frameRow[tAlphaChannelIndex], 255u));
5054 frameRow += tChannels;
5057 frameRow += framePaddingElements;
5061template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
5064 static_assert(tChannels >= 2u,
"Invalid channel number!");
5065 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
5067 ocean_assert(source !=
nullptr && target !=
nullptr);
5068 ocean_assert(width >= 1u);
5070 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
5071 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
5073 const uint8_t* sourceRow = source + sourceStrideElements * firstRow;
5074 uint8_t* targetRow = target + targetStrideElements * firstRow;
5076 for (
unsigned int y = 0u; y < numberRows; ++y)
5078 for (
unsigned int x = 0u; x < width; ++x)
5080 if (sourceRow[tAlphaChannelIndex])
5082 const uint8_t alpha_2 = sourceRow[tAlphaChannelIndex] / 2u;
5084 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5086 if (channelIndex != tAlphaChannelIndex)
5088 targetRow[channelIndex] = uint8_t(std::max((sourceRow[channelIndex] * 255u + alpha_2) / sourceRow[tAlphaChannelIndex], 255u));
5092 targetRow[channelIndex] = sourceRow[channelIndex];
5098 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5100 targetRow[channelIndex] = sourceRow[channelIndex];
5104 sourceRow += tChannels;
5105 targetRow += tChannels;
5108 sourceRow += sourcePaddingElements;
5109 targetRow += targetPaddingElements;
5113template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
5116 static_assert(tChannels >= 2u,
"Invalid channel number!");
5117 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
5119 ocean_assert(frame !=
nullptr);
5120 ocean_assert(width >= 1u);
5122 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
5124 uint8_t* frameRow = frame + frameStrideElements * firstRow;
5126 for (
unsigned int y = 0u; y < numberRows; ++y)
5128 for (
unsigned int x = 0u; x < width; ++x)
5130 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5132 if (channelIndex != tAlphaChannelIndex)
5134 frameRow[channelIndex] = (frameRow[channelIndex] * frameRow[tAlphaChannelIndex] + 127u) / 255u;
5138 frameRow += tChannels;
5141 frameRow += framePaddingElements;
5145template <
unsigned int tChannels,
unsigned int tAlphaChannelIndex>
5148 static_assert(tChannels >= 2u,
"Invalid channel number!");
5149 static_assert(tAlphaChannelIndex < tChannels,
"Invalid alpha channel index!");
5151 ocean_assert(source !=
nullptr && target !=
nullptr);
5152 ocean_assert(width >= 1u);
5154 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
5155 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
5157 const uint8_t* sourceRow = source + sourceStrideElements * firstRow;
5158 uint8_t* targetRow = target + targetStrideElements * firstRow;
5160 for (
unsigned int y = 0u; y < numberRows; ++y)
5162 for (
unsigned int x = 0u; x < width; ++x)
5164 for (
unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5166 if (channelIndex != tAlphaChannelIndex)
5168 targetRow[channelIndex] = (sourceRow[channelIndex] * sourceRow[tAlphaChannelIndex] + 127u) / 255u;
5172 targetRow[channelIndex] = sourceRow[channelIndex];
5176 sourceRow += tChannels;
5177 targetRow += tChannels;
5180 sourceRow += sourcePaddingElements;
5181 targetRow += targetPaddingElements;
5185#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
5189 ocean_assert(source !=
nullptr && target !=
nullptr);
5206 const __m128i constant64_u_16x8 = _mm_set1_epi32(0x00400040);
5208 const __m128i sourceA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5209 const __m128i sourceB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5210 const __m128i sourceC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5212 __m128i channel0_u_8x16;
5213 __m128i channel1_u_8x16;
5214 __m128i channel2_u_8x16;
5223 const __m128i channel0_high_u_8x16 = _mm_srli_epi16(channel0_u_8x16, 8);
5224 const __m128i channel1_high_u_8x16 = _mm_srli_epi16(channel1_u_8x16, 8);
5225 const __m128i channel2_high_u_8x16 = _mm_srli_epi16(channel2_u_8x16, 8);
5229 const __m128i result0_low_u_8x16 = _mm_mullo_epi16(channel0_low_u_8x16, multiplicationFactors0_128_u_16x8);
5230 const __m128i result0_high_u_8x16 = _mm_mullo_epi16(channel0_high_u_8x16, multiplicationFactors0_128_u_16x8);
5232 const __m128i result1_low_u_8x16 = _mm_mullo_epi16(channel1_low_u_8x16, multiplicationFactors1_128_u_16x8);
5233 const __m128i result1_high_u_8x16 = _mm_mullo_epi16(channel1_high_u_8x16, multiplicationFactors1_128_u_16x8);
5235 const __m128i result2_low_u_8x16 = _mm_mullo_epi16(channel2_low_u_8x16, multiplicationFactors2_128_u_16x8);
5236 const __m128i result2_high_u_8x16 = _mm_mullo_epi16(channel2_high_u_8x16, multiplicationFactors2_128_u_16x8);
5239 const __m128i result128_low_u_8x16 = _mm_adds_epu16(_mm_adds_epu16(result0_low_u_8x16, result1_low_u_8x16), _mm_adds_epu16(result2_low_u_8x16, constant64_u_16x8));
5240 const __m128i result128_high_u_8x16 = _mm_adds_epu16(_mm_adds_epu16(result0_high_u_8x16, result1_high_u_8x16), _mm_adds_epu16(result2_high_u_8x16, constant64_u_16x8));
5243 const __m128i result_low_u_8x16 = _mm_srli_epi16(result128_low_u_8x16, 7);
5244 const __m128i result_high_u_8x16 = _mm_srli_epi16(result128_high_u_8x16, 7);
5247 const __m128i result_u_8x16 = _mm_or_si128(result_low_u_8x16, _mm_slli_epi16(result_high_u_8x16, 8));
5250 _mm_storeu_si128((__m128i*)target, result_u_8x16);
5253OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_128_s_16x8,
const __m128i& factorChannel10_128_s_16x8,
const __m128i& factorChannel20_128_s_16x8,
const __m128i& factorChannel01_128_s_16x8,
const __m128i& factorChannel11_128_s_16x8,
const __m128i& factorChannel21_128_s_16x8,
const __m128i& factorChannel02_128_s_16x8,
const __m128i& factorChannel12_128_s_16x8,
const __m128i& factorChannel22_128_s_16x8,
const __m128i& biasChannel0_s_16x8,
const __m128i& biasChannel1_s_16x8,
const __m128i& biasChannel2_s_16x8)
5255 ocean_assert(source !=
nullptr && target !=
nullptr);
5274 const __m128i sourceA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5275 const __m128i sourceB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5276 const __m128i sourceC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5278 __m128i channel0_u_8x16;
5279 __m128i channel1_u_8x16;
5280 __m128i channel2_u_8x16;
5289 const __m128i channel0_high_u_8x16 = _mm_srli_epi16(channel0_u_8x16, 8);
5290 const __m128i channel1_high_u_8x16 = _mm_srli_epi16(channel1_u_8x16, 8);
5291 const __m128i channel2_high_u_8x16 = _mm_srli_epi16(channel2_u_8x16, 8);
5295 __m128i result0_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel00_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel01_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel02_128_s_16x8));
5296 __m128i result1_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel10_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel11_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel12_128_s_16x8));
5297 __m128i result2_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel20_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel21_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel22_128_s_16x8));
5299 __m128i result0_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel00_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel01_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel02_128_s_16x8));
5300 __m128i result1_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel10_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel11_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel12_128_s_16x8));
5301 __m128i result2_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel20_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel21_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel22_128_s_16x8));
5315 const __m128i constant255_s_16x8 = _mm_set1_epi16(255);
5317 result0_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result0_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5318 result1_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result1_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5319 result2_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result2_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5321 result0_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result0_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5322 result1_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result1_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5323 result2_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result2_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5326 const __m128i result0_u_8x16 = _mm_or_si128(result0_low_u_8x16, _mm_slli_epi16(result0_high_u_8x16, 8));
5327 const __m128i result1_u_8x16 = _mm_or_si128(result1_low_u_8x16, _mm_slli_epi16(result1_high_u_8x16, 8));
5328 const __m128i result2_u_8x16 = _mm_or_si128(result2_low_u_8x16, _mm_slli_epi16(result2_high_u_8x16, 8));
5330 __m128i resultA_u_8x16;
5331 __m128i resultB_u_8x16;
5332 __m128i resultC_u_8x16;
5336 _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5337 _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5338 _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5341OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(
const uint8_t*
const source, uint8_t*
const target,
const __m128i& factorChannel00_1024_s_16x8,
const __m128i& factorChannel10_1024_s_16x8,
const __m128i& factorChannel20_1024_s_16x8,
const __m128i& factorChannel01_1024_s_16x8,
const __m128i& factorChannel11_1024_s_16x8,
const __m128i& factorChannel21_1024_s_16x8,
const __m128i& factorChannel02_1024_s_16x8,
const __m128i& factorChannel12_1024_s_16x8,
const __m128i& factorChannel22_1024_s_16x8,
const __m128i& biasChannel0_1024_s_32x4,
const __m128i& biasChannel1_1024_s_32x4,
const __m128i& biasChannel2_1024_s_32x4)
5343 ocean_assert(source !=
nullptr && target !=
nullptr);
5363 const __m128i sourceA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5364 const __m128i sourceB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5365 const __m128i sourceC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5367 __m128i channel0_u_8x16;
5368 __m128i channel1_u_8x16;
5369 __m128i channel2_u_8x16;
5379 const __m128i channel0_high_u_16x8 = _mm_srli_epi16(channel0_u_8x16, 8);
5380 const __m128i channel1_high_u_16x8 = _mm_srli_epi16(channel1_u_8x16, 8);
5381 const __m128i channel2_high_u_16x8 = _mm_srli_epi16(channel2_u_8x16, 8);
5386 __m128i result0_low_A_s_32x4;
5387 __m128i result0_low_B_s_32x4;
5388 __m128i result0_high_A_s_32x4;
5389 __m128i result0_high_B_s_32x4;
5406 __m128i result1_low_A_s_32x4;
5407 __m128i result1_low_B_s_32x4;
5408 __m128i result1_high_A_s_32x4;
5409 __m128i result1_high_B_s_32x4;
5426 __m128i result2_low_A_s_32x4;
5427 __m128i result2_low_B_s_32x4;
5428 __m128i result2_high_A_s_32x4;
5429 __m128i result2_high_B_s_32x4;
5449 const __m128i mask_0000FFFF_32x4 = _mm_set1_epi32(0x0000FFFF);
5451 __m128i result0_A_s_16x8 = _mm_or_si128(_mm_and_si128(result0_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result0_high_A_s_32x4, 16));
5452 __m128i result0_B_s_16x8 = _mm_or_si128(_mm_and_si128(result0_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result0_high_B_s_32x4, 16));
5454 __m128i result1_A_s_16x8 = _mm_or_si128(_mm_and_si128(result1_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result1_high_A_s_32x4, 16));
5455 __m128i result1_B_s_16x8 = _mm_or_si128(_mm_and_si128(result1_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result1_high_B_s_32x4, 16));
5457 __m128i result2_A_s_16x8 = _mm_or_si128(_mm_and_si128(result2_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result2_high_A_s_32x4, 16));
5458 __m128i result2_B_s_16x8 = _mm_or_si128(_mm_and_si128(result2_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result2_high_B_s_32x4, 16));
5463 const __m128i result0_u_8x16 = _mm_packus_epi16(result0_A_s_16x8, result0_B_s_16x8);
5464 const __m128i result1_u_8x16 = _mm_packus_epi16(result1_A_s_16x8, result1_B_s_16x8);
5465 const __m128i result2_u_8x16 = _mm_packus_epi16(result2_A_s_16x8, result2_B_s_16x8);
5467 __m128i resultA_u_8x16;
5468 __m128i resultB_u_8x16;
5469 __m128i resultC_u_8x16;
5473 _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5474 _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5475 _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5480 ocean_assert(source !=
nullptr && target !=
nullptr);
5503 const __m128i constant64_u_8x16 = _mm_set1_epi32(0x00400040);
5505 const __m128i pixelsA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5506 const __m128i pixelsB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5507 const __m128i pixelsC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5508 const __m128i pixelsD_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 3);
5513 const __m128i intermediateResults0_u_16x8 = _mm_maddubs_epi16(pixelsA_u_8x16, multiplicationFactors0123_128_s_32x4);
5514 const __m128i intermediateResults1_u_16x8 = _mm_maddubs_epi16(pixelsB_u_8x16, multiplicationFactors0123_128_s_32x4);
5515 const __m128i intermediateResults2_u_16x8 = _mm_maddubs_epi16(pixelsC_u_8x16, multiplicationFactors0123_128_s_32x4);
5516 const __m128i intermediateResults3_u_16x8 = _mm_maddubs_epi16(pixelsD_u_8x16, multiplicationFactors0123_128_s_32x4);
5519 __m128i grayA_u_16x8 = _mm_hadd_epi16(intermediateResults0_u_16x8, intermediateResults1_u_16x8);
5520 __m128i grayB_u_16x8 = _mm_hadd_epi16(intermediateResults2_u_16x8, intermediateResults3_u_16x8);
5523 grayA_u_16x8 = _mm_add_epi16(grayA_u_16x8, constant64_u_8x16);
5524 grayB_u_16x8 = _mm_add_epi16(grayB_u_16x8, constant64_u_8x16);
5527 grayA_u_16x8 = _mm_srli_epi16(grayA_u_16x8, 7);
5528 grayB_u_16x8 = _mm_srli_epi16(grayB_u_16x8, 7);
5535 const __m128i gray_u_8x16 = _mm_packus_epi16(grayA_u_16x8, grayB_u_16x8);
5538 _mm_storeu_si128((__m128i*)target, gray_u_8x16);
5543 ocean_assert(source !=
nullptr && target !=
nullptr);
5564 const __m128i constant64_u_8x16 = _mm_set1_epi32(0x00400040);
5566 const __m128i pixelsA_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 0);
5567 const __m128i pixelsB_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 1);
5568 const __m128i pixelsC_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 2);
5569 const __m128i pixelsD_u_8x16 = _mm_loadu_si128((
const __m128i*)source + 3);
5573 const __m128i pixelsA_u_16x8 = _mm_unpacklo_epi8(pixelsA_u_8x16, _mm_setzero_si128());
5574 const __m128i pixelsB_u_16x8 = _mm_unpackhi_epi8(pixelsA_u_8x16, _mm_setzero_si128());
5576 const __m128i pixelsC_u_16x8 = _mm_unpacklo_epi8(pixelsB_u_8x16, _mm_setzero_si128());
5577 const __m128i pixelsD_u_16x8 = _mm_unpackhi_epi8(pixelsB_u_8x16, _mm_setzero_si128());
5579 const __m128i pixelsE_u_16x8 = _mm_unpacklo_epi8(pixelsC_u_8x16, _mm_setzero_si128());
5580 const __m128i pixelsF_u_16x8 = _mm_unpackhi_epi8(pixelsC_u_8x16, _mm_setzero_si128());
5582 const __m128i pixelsG_u_16x8 = _mm_unpacklo_epi8(pixelsD_u_8x16, _mm_setzero_si128());
5583 const __m128i pixelsH_u_16x8 = _mm_unpackhi_epi8(pixelsD_u_8x16, _mm_setzero_si128());
5589 const __m128i intermediateResultsChannel0_0_u_32x4 = _mm_madd_epi16(pixelsA_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5590 const __m128i intermediateResultsChannel0_1_u_32x4 = _mm_madd_epi16(pixelsB_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5591 const __m128i intermediateResultsChannel0_2_u_32x4 = _mm_madd_epi16(pixelsC_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5592 const __m128i intermediateResultsChannel0_3_u_32x4 = _mm_madd_epi16(pixelsD_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5593 const __m128i intermediateResultsChannel0_4_u_32x4 = _mm_madd_epi16(pixelsE_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5594 const __m128i intermediateResultsChannel0_5_u_32x4 = _mm_madd_epi16(pixelsF_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5595 const __m128i intermediateResultsChannel0_6_u_32x4 = _mm_madd_epi16(pixelsG_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5596 const __m128i intermediateResultsChannel0_7_u_32x4 = _mm_madd_epi16(pixelsH_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5598 const __m128i resultsChannel0_A_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_0_u_32x4, intermediateResultsChannel0_1_u_32x4);
5599 const __m128i resultsChannel0_B_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_2_u_32x4, intermediateResultsChannel0_3_u_32x4);
5600 const __m128i resultsChannel0_C_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_4_u_32x4, intermediateResultsChannel0_5_u_32x4);
5601 const __m128i resultsChannel0_D_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_6_u_32x4, intermediateResultsChannel0_7_u_32x4);
5604 const __m128i intermediateResultsChannel1_0_u_32x4 = _mm_madd_epi16(pixelsA_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5605 const __m128i intermediateResultsChannel1_1_u_32x4 = _mm_madd_epi16(pixelsB_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5606 const __m128i intermediateResultsChannel1_2_u_32x4 = _mm_madd_epi16(pixelsC_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5607 const __m128i intermediateResultsChannel1_3_u_32x4 = _mm_madd_epi16(pixelsD_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5608 const __m128i intermediateResultsChannel1_4_u_32x4 = _mm_madd_epi16(pixelsE_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5609 const __m128i intermediateResultsChannel1_5_u_32x4 = _mm_madd_epi16(pixelsF_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5610 const __m128i intermediateResultsChannel1_6_u_32x4 = _mm_madd_epi16(pixelsG_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5611 const __m128i intermediateResultsChannel1_7_u_32x4 = _mm_madd_epi16(pixelsH_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5613 const __m128i resultsChannel1_A_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_0_u_32x4, intermediateResultsChannel1_1_u_32x4);
5614 const __m128i resultsChannel1_B_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_2_u_32x4, intermediateResultsChannel1_3_u_32x4);
5615 const __m128i resultsChannel1_C_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_4_u_32x4, intermediateResultsChannel1_5_u_32x4);
5616 const __m128i resultsChannel1_D_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_6_u_32x4, intermediateResultsChannel1_7_u_32x4);
5620 __m128i resultA_u_16x8 = _mm_or_si128(resultsChannel0_A_u_32x4, _mm_slli_epi32(resultsChannel1_A_u_32x4, 16));
5621 __m128i resultB_u_16x8 = _mm_or_si128(resultsChannel0_B_u_32x4, _mm_slli_epi32(resultsChannel1_B_u_32x4, 16));
5622 __m128i resultC_u_16x8 = _mm_or_si128(resultsChannel0_C_u_32x4, _mm_slli_epi32(resultsChannel1_C_u_32x4, 16));
5623 __m128i resultD_u_16x8 = _mm_or_si128(resultsChannel0_D_u_32x4, _mm_slli_epi32(resultsChannel1_D_u_32x4, 16));
5626 resultA_u_16x8 = _mm_add_epi16(resultA_u_16x8, constant64_u_8x16);
5627 resultB_u_16x8 = _mm_add_epi16(resultB_u_16x8, constant64_u_8x16);
5628 resultC_u_16x8 = _mm_add_epi16(resultC_u_16x8, constant64_u_8x16);
5629 resultD_u_16x8 = _mm_add_epi16(resultD_u_16x8, constant64_u_8x16);
5632 resultA_u_16x8 = _mm_srli_epi16(resultA_u_16x8, 7);
5633 resultB_u_16x8 = _mm_srli_epi16(resultB_u_16x8, 7);
5634 resultC_u_16x8 = _mm_srli_epi16(resultC_u_16x8, 7);
5635 resultD_u_16x8 = _mm_srli_epi16(resultD_u_16x8, 7);
5642 const __m128i resultAB_u_8x16 = _mm_packus_epi16(resultA_u_16x8, resultB_u_16x8);
5643 const __m128i resultCD_u_8x16 = _mm_packus_epi16(resultC_u_16x8, resultD_u_16x8);
5646 _mm_storeu_si128((__m128i*)target + 0, resultAB_u_8x16);
5647 _mm_storeu_si128((__m128i*)target + 1, resultCD_u_8x16);
5652#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5654template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2>
5657 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2,
"Invalid multiplication factors!");
5659 ocean_assert(source !=
nullptr && target !=
nullptr);
5678 uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5680 uint16x8_t intermediateResults_u_16x8;
5684 if constexpr (tUseFactorChannel0)
5686 intermediateResults_u_16x8 = vmull_u8(source_u_8x8x3.val[0], factorChannel0_128_u_8x8);
5690 intermediateResults_u_16x8 = vdupq_n_u16(0u);
5695 if constexpr (tUseFactorChannel1)
5697 intermediateResults_u_16x8 = vmlal_u8(intermediateResults_u_16x8, source_u_8x8x3.val[1], factorChannel1_128_u_8x8);
5702 if constexpr (tUseFactorChannel2)
5704 intermediateResults_u_16x8 = vmlal_u8(intermediateResults_u_16x8, source_u_8x8x3.val[2], factorChannel2_128_u_8x8);
5708 uint8x8_t results_u_8x8 = vqrshrn_n_u16(intermediateResults_u_16x8, 7);
5711 vst1_u8(target, results_u_8x8);
5714OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8)
5716 ocean_assert(source !=
nullptr && target !=
nullptr);
5736 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5739 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[0], biasChannel0_u_8x8));
5740 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[1], biasChannel1_u_8x8));
5741 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[2], biasChannel2_u_8x8));
5745 int16x8_t intermediateResults0_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel00_64_s_16x8);
5746 int16x8_t intermediateResults1_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel10_64_s_16x8);
5747 int16x8_t intermediateResults2_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel20_64_s_16x8);
5749 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, vmulq_s16(source1_s_16x8, factorChannel01_64_s_16x8));
5750 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, vmulq_s16(source1_s_16x8, factorChannel11_64_s_16x8));
5751 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, vmulq_s16(source1_s_16x8, factorChannel21_64_s_16x8));
5753 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, vmulq_s16(source2_s_16x8, factorChannel02_64_s_16x8));
5754 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, vmulq_s16(source2_s_16x8, factorChannel12_64_s_16x8));
5755 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, vmulq_s16(source2_s_16x8, factorChannel22_64_s_16x8));
5757 uint8x8x3_t results_u_8x8x3;
5760 results_u_8x8x3.val[0] = vqrshrun_n_s16(intermediateResults0_s_16x8, 6);
5761 results_u_8x8x3.val[1] = vqrshrun_n_s16(intermediateResults1_s_16x8, 6);
5762 results_u_8x8x3.val[2] = vqrshrun_n_s16(intermediateResults2_s_16x8, 6);
5765 vst3_u8(target, results_u_8x8x3);
5768OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8)
5770 ocean_assert(source !=
nullptr && target !=
nullptr);
5785 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
5788 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
5789 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
5790 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
5792 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
5793 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
5794 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
5798 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_64_s_16x8);
5799 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_64_s_16x8);
5800 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_64_s_16x8);
5802 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_64_s_16x8);
5803 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_64_s_16x8);
5804 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_64_s_16x8);
5806 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel01_64_s_16x8));
5807 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel11_64_s_16x8));
5808 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel21_64_s_16x8));
5810 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel01_64_s_16x8));
5811 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel11_64_s_16x8));
5812 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel21_64_s_16x8));
5814 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel02_64_s_16x8));
5815 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel12_64_s_16x8));
5816 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel22_64_s_16x8));
5818 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel02_64_s_16x8));
5819 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel12_64_s_16x8));
5820 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel22_64_s_16x8));
5822 uint8x16x3_t results_u_8x16x3;
5825 results_u_8x16x3.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 6));
5826 results_u_8x16x3.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 6));
5827 results_u_8x16x3.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 6));
5830 vst3q_u8(target, results_u_8x16x3);
5833OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_128_s_16x8,
const int16x8_t& factorChannel10_128_s_16x8,
const int16x8_t& factorChannel20_128_s_16x8,
const int16x8_t& factorChannel01_128_s_16x8,
const int16x8_t& factorChannel11_128_s_16x8,
const int16x8_t& factorChannel21_128_s_16x8,
const int16x8_t& factorChannel02_128_s_16x8,
const int16x8_t& factorChannel12_128_s_16x8,
const int16x8_t& factorChannel22_128_s_16x8,
const int16x8_t& biasChannel0_128_s_16x8,
const int16x8_t& biasChannel1_128_s_16x8,
const int16x8_t& biasChannel2_128_s_16x8)
5835 ocean_assert(source !=
nullptr && target !=
nullptr);
5855 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5857 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[0]));
5858 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[1]));
5859 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[2]));
5861 int16x8_t intermediateResults0_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel00_128_s_16x8);
5862 int16x8_t intermediateResults1_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel10_128_s_16x8);
5863 int16x8_t intermediateResults2_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel20_128_s_16x8);
5865 intermediateResults0_s_16x8 = vmlaq_s16(intermediateResults0_s_16x8, source1_s_16x8, factorChannel01_128_s_16x8);
5866 intermediateResults1_s_16x8 = vmlaq_s16(intermediateResults1_s_16x8, source1_s_16x8, factorChannel11_128_s_16x8);
5867 intermediateResults2_s_16x8 = vmlaq_s16(intermediateResults2_s_16x8, source1_s_16x8, factorChannel21_128_s_16x8);
5869 intermediateResults0_s_16x8 = vmlaq_s16(intermediateResults0_s_16x8, source2_s_16x8, factorChannel02_128_s_16x8);
5870 intermediateResults1_s_16x8 = vmlaq_s16(intermediateResults1_s_16x8, source2_s_16x8, factorChannel12_128_s_16x8);
5871 intermediateResults2_s_16x8 = vmlaq_s16(intermediateResults2_s_16x8, source2_s_16x8, factorChannel22_128_s_16x8);
5875 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, biasChannel0_128_s_16x8);
5876 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, biasChannel1_128_s_16x8);
5877 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, biasChannel2_128_s_16x8);
5879 uint8x8x3_t results_u_8x8x3;
5882 results_u_8x8x3.val[0] = vqrshrun_n_s16(intermediateResults0_s_16x8, 7);
5883 results_u_8x8x3.val[1] = vqrshrun_n_s16(intermediateResults1_s_16x8, 7);
5884 results_u_8x8x3.val[2] = vqrshrun_n_s16(intermediateResults2_s_16x8, 7);
5887 vst3_u8(target, results_u_8x8x3);
5890OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x4_t& factorChannel00_1024_s_16x4,
const int16x4_t& factorChannel10_1024_s_16x4,
const int16x4_t& factorChannel20_1024_s_16x4,
const int16x4_t& factorChannel01_1024_s_16x4,
const int16x4_t& factorChannel11_1024_s_16x4,
const int16x4_t& factorChannel21_1024_s_16x4,
const int16x4_t& factorChannel02_1024_s_16x4,
const int16x4_t& factorChannel12_1024_s_16x4,
const int16x4_t& factorChannel22_1024_s_16x4,
const int32x4_t& biasChannel0_1024_s_32x4,
const int32x4_t& biasChannel1_1024_s_32x4,
const int32x4_t& biasChannel2_1024_s_32x4)
5892 ocean_assert(source !=
nullptr && target !=
nullptr);
5913 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5915 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[0]));
5916 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[1]));
5917 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[2]));
5919 const int16x4_t source0_low_s_16x4 = vget_low_s16(source0_s_16x8);
5920 const int16x4_t source0_high_s_16x4 = vget_high_s16(source0_s_16x8);
5922 int32x4_t intermediateResults0_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel00_1024_s_16x4);
5923 int32x4_t intermediateResults0_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel00_1024_s_16x4);
5925 int32x4_t intermediateResults1_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel10_1024_s_16x4);
5926 int32x4_t intermediateResults1_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel10_1024_s_16x4);
5928 int32x4_t intermediateResults2_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel20_1024_s_16x4);
5929 int32x4_t intermediateResults2_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel20_1024_s_16x4);
5932 const int16x4_t source1_low_s_16x4 = vget_low_s16(source1_s_16x8);
5933 const int16x4_t source1_high_s_16x4 = vget_high_s16(source1_s_16x8);
5935 intermediateResults0_low_s_32x4 = vmlal_s16(intermediateResults0_low_s_32x4, source1_low_s_16x4, factorChannel01_1024_s_16x4);
5936 intermediateResults0_high_s_32x4 = vmlal_s16(intermediateResults0_high_s_32x4, source1_high_s_16x4, factorChannel01_1024_s_16x4);
5938 intermediateResults1_low_s_32x4 = vmlal_s16(intermediateResults1_low_s_32x4, source1_low_s_16x4, factorChannel11_1024_s_16x4);
5939 intermediateResults1_high_s_32x4 = vmlal_s16(intermediateResults1_high_s_32x4, source1_high_s_16x4, factorChannel11_1024_s_16x4);
5941 intermediateResults2_low_s_32x4 = vmlal_s16(intermediateResults2_low_s_32x4, source1_low_s_16x4, factorChannel21_1024_s_16x4);
5942 intermediateResults2_high_s_32x4 = vmlal_s16(intermediateResults2_high_s_32x4, source1_high_s_16x4, factorChannel21_1024_s_16x4);
5945 const int16x4_t source2_low_s_16x4 = vget_low_s16(source2_s_16x8);
5946 const int16x4_t source2_high_s_16x4 = vget_high_s16(source2_s_16x8);
5948 intermediateResults0_low_s_32x4 = vmlal_s16(intermediateResults0_low_s_32x4, source2_low_s_16x4, factorChannel02_1024_s_16x4);
5949 intermediateResults0_high_s_32x4 = vmlal_s16(intermediateResults0_high_s_32x4, source2_high_s_16x4, factorChannel02_1024_s_16x4);
5951 intermediateResults1_low_s_32x4 = vmlal_s16(intermediateResults1_low_s_32x4, source2_low_s_16x4, factorChannel12_1024_s_16x4);
5952 intermediateResults1_high_s_32x4 = vmlal_s16(intermediateResults1_high_s_32x4, source2_high_s_16x4, factorChannel12_1024_s_16x4);
5954 intermediateResults2_low_s_32x4 = vmlal_s16(intermediateResults2_low_s_32x4, source2_low_s_16x4, factorChannel22_1024_s_16x4);
5955 intermediateResults2_high_s_32x4 = vmlal_s16(intermediateResults2_high_s_32x4, source2_high_s_16x4, factorChannel22_1024_s_16x4);
5960 intermediateResults0_low_s_32x4 = vaddq_s32(intermediateResults0_low_s_32x4, biasChannel0_1024_s_32x4);
5961 intermediateResults0_high_s_32x4 = vaddq_s32(intermediateResults0_high_s_32x4, biasChannel0_1024_s_32x4);
5963 intermediateResults1_low_s_32x4 = vaddq_s32(intermediateResults1_low_s_32x4, biasChannel1_1024_s_32x4);
5964 intermediateResults1_high_s_32x4 = vaddq_s32(intermediateResults1_high_s_32x4, biasChannel1_1024_s_32x4);
5966 intermediateResults2_low_s_32x4 = vaddq_s32(intermediateResults2_low_s_32x4, biasChannel2_1024_s_32x4);
5967 intermediateResults2_high_s_32x4 = vaddq_s32(intermediateResults2_high_s_32x4, biasChannel2_1024_s_32x4);
5970 uint8x8x3_t results_u_8x8x3;
5973 results_u_8x8x3.val[0] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_high_s_32x4, 10)));
5974 results_u_8x8x3.val[1] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_high_s_32x4, 10)));
5975 results_u_8x8x3.val[2] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_high_s_32x4, 10)));
5978 vst3_u8(target, results_u_8x8x3);
5981OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x4_t& factorChannel00_1024_s_16x4,
const int16x4_t& factorChannel10_1024_s_16x4,
const int16x4_t& factorChannel20_1024_s_16x4,
const int16x4_t& factorChannel01_1024_s_16x4,
const int16x4_t& factorChannel11_1024_s_16x4,
const int16x4_t& factorChannel21_1024_s_16x4,
const int16x4_t& factorChannel02_1024_s_16x4,
const int16x4_t& factorChannel12_1024_s_16x4,
const int16x4_t& factorChannel22_1024_s_16x4,
const int32x4_t& biasChannel0_1024_s_32x4,
const int32x4_t& biasChannel1_1024_s_32x4,
const int32x4_t& biasChannel2_1024_s_32x4)
5983 ocean_assert(source !=
nullptr && target !=
nullptr);
6004 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6006 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[0])));
6007 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[1])));
6008 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[2])));
6010 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[0])));
6011 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[1])));
6012 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[2])));
6014 const int16x4_t source0_A_s_16x4 = vget_low_s16(source0_low_s_16x8);
6015 const int16x4_t source0_B_s_16x4 = vget_high_s16(source0_low_s_16x8);
6016 const int16x4_t source0_C_s_16x4 = vget_low_s16(source0_high_s_16x8);
6017 const int16x4_t source0_D_s_16x4 = vget_high_s16(source0_high_s_16x8);
6019 int32x4_t intermediateResults0_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel00_1024_s_16x4);
6020 int32x4_t intermediateResults0_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel00_1024_s_16x4);
6021 int32x4_t intermediateResults0_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel00_1024_s_16x4);
6022 int32x4_t intermediateResults0_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel00_1024_s_16x4);
6024 int32x4_t intermediateResults1_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel10_1024_s_16x4);
6025 int32x4_t intermediateResults1_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel10_1024_s_16x4);
6026 int32x4_t intermediateResults1_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel10_1024_s_16x4);
6027 int32x4_t intermediateResults1_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel10_1024_s_16x4);
6029 int32x4_t intermediateResults2_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel20_1024_s_16x4);
6030 int32x4_t intermediateResults2_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel20_1024_s_16x4);
6031 int32x4_t intermediateResults2_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel20_1024_s_16x4);
6032 int32x4_t intermediateResults2_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel20_1024_s_16x4);
6035 const int16x4_t source1_A_s_16x4 = vget_low_s16(source1_low_s_16x8);
6036 const int16x4_t source1_B_s_16x4 = vget_high_s16(source1_low_s_16x8);
6037 const int16x4_t source1_C_s_16x4 = vget_low_s16(source1_high_s_16x8);
6038 const int16x4_t source1_D_s_16x4 = vget_high_s16(source1_high_s_16x8);
6040 intermediateResults0_A_s_32x4 = vmlal_s16(intermediateResults0_A_s_32x4, source1_A_s_16x4, factorChannel01_1024_s_16x4);
6041 intermediateResults0_B_s_32x4 = vmlal_s16(intermediateResults0_B_s_32x4, source1_B_s_16x4, factorChannel01_1024_s_16x4);
6042 intermediateResults0_C_s_32x4 = vmlal_s16(intermediateResults0_C_s_32x4, source1_C_s_16x4, factorChannel01_1024_s_16x4);
6043 intermediateResults0_D_s_32x4 = vmlal_s16(intermediateResults0_D_s_32x4, source1_D_s_16x4, factorChannel01_1024_s_16x4);
6045 intermediateResults1_A_s_32x4 = vmlal_s16(intermediateResults1_A_s_32x4, source1_A_s_16x4, factorChannel11_1024_s_16x4);
6046 intermediateResults1_B_s_32x4 = vmlal_s16(intermediateResults1_B_s_32x4, source1_B_s_16x4, factorChannel11_1024_s_16x4);
6047 intermediateResults1_C_s_32x4 = vmlal_s16(intermediateResults1_C_s_32x4, source1_C_s_16x4, factorChannel11_1024_s_16x4);
6048 intermediateResults1_D_s_32x4 = vmlal_s16(intermediateResults1_D_s_32x4, source1_D_s_16x4, factorChannel11_1024_s_16x4);
6050 intermediateResults2_A_s_32x4 = vmlal_s16(intermediateResults2_A_s_32x4, source1_A_s_16x4, factorChannel21_1024_s_16x4);
6051 intermediateResults2_B_s_32x4 = vmlal_s16(intermediateResults2_B_s_32x4, source1_B_s_16x4, factorChannel21_1024_s_16x4);
6052 intermediateResults2_C_s_32x4 = vmlal_s16(intermediateResults2_C_s_32x4, source1_C_s_16x4, factorChannel21_1024_s_16x4);
6053 intermediateResults2_D_s_32x4 = vmlal_s16(intermediateResults2_D_s_32x4, source1_D_s_16x4, factorChannel21_1024_s_16x4);
6056 const int16x4_t source2_A_s_16x4 = vget_low_s16(source2_low_s_16x8);
6057 const int16x4_t source2_B_s_16x4 = vget_high_s16(source2_low_s_16x8);
6058 const int16x4_t source2_C_s_16x4 = vget_low_s16(source2_high_s_16x8);
6059 const int16x4_t source2_D_s_16x4 = vget_high_s16(source2_high_s_16x8);
6061 intermediateResults0_A_s_32x4 = vmlal_s16(intermediateResults0_A_s_32x4, source2_A_s_16x4, factorChannel02_1024_s_16x4);
6062 intermediateResults0_B_s_32x4 = vmlal_s16(intermediateResults0_B_s_32x4, source2_B_s_16x4, factorChannel02_1024_s_16x4);
6063 intermediateResults0_C_s_32x4 = vmlal_s16(intermediateResults0_C_s_32x4, source2_C_s_16x4, factorChannel02_1024_s_16x4);
6064 intermediateResults0_D_s_32x4 = vmlal_s16(intermediateResults0_D_s_32x4, source2_D_s_16x4, factorChannel02_1024_s_16x4);
6066 intermediateResults1_A_s_32x4 = vmlal_s16(intermediateResults1_A_s_32x4, source2_A_s_16x4, factorChannel12_1024_s_16x4);
6067 intermediateResults1_B_s_32x4 = vmlal_s16(intermediateResults1_B_s_32x4, source2_B_s_16x4, factorChannel12_1024_s_16x4);
6068 intermediateResults1_C_s_32x4 = vmlal_s16(intermediateResults1_C_s_32x4, source2_C_s_16x4, factorChannel12_1024_s_16x4);
6069 intermediateResults1_D_s_32x4 = vmlal_s16(intermediateResults1_D_s_32x4, source2_D_s_16x4, factorChannel12_1024_s_16x4);
6071 intermediateResults2_A_s_32x4 = vmlal_s16(intermediateResults2_A_s_32x4, source2_A_s_16x4, factorChannel22_1024_s_16x4);
6072 intermediateResults2_B_s_32x4 = vmlal_s16(intermediateResults2_B_s_32x4, source2_B_s_16x4, factorChannel22_1024_s_16x4);
6073 intermediateResults2_C_s_32x4 = vmlal_s16(intermediateResults2_C_s_32x4, source2_C_s_16x4, factorChannel22_1024_s_16x4);
6074 intermediateResults2_D_s_32x4 = vmlal_s16(intermediateResults2_D_s_32x4, source2_D_s_16x4, factorChannel22_1024_s_16x4);
6079 intermediateResults0_A_s_32x4 = vaddq_s32(intermediateResults0_A_s_32x4, biasChannel0_1024_s_32x4);
6080 intermediateResults0_B_s_32x4 = vaddq_s32(intermediateResults0_B_s_32x4, biasChannel0_1024_s_32x4);
6081 intermediateResults0_C_s_32x4 = vaddq_s32(intermediateResults0_C_s_32x4, biasChannel0_1024_s_32x4);
6082 intermediateResults0_D_s_32x4 = vaddq_s32(intermediateResults0_D_s_32x4, biasChannel0_1024_s_32x4);
6084 intermediateResults1_A_s_32x4 = vaddq_s32(intermediateResults1_A_s_32x4, biasChannel1_1024_s_32x4);
6085 intermediateResults1_B_s_32x4 = vaddq_s32(intermediateResults1_B_s_32x4, biasChannel1_1024_s_32x4);
6086 intermediateResults1_C_s_32x4 = vaddq_s32(intermediateResults1_C_s_32x4, biasChannel1_1024_s_32x4);
6087 intermediateResults1_D_s_32x4 = vaddq_s32(intermediateResults1_D_s_32x4, biasChannel1_1024_s_32x4);
6089 intermediateResults2_A_s_32x4 = vaddq_s32(intermediateResults2_A_s_32x4, biasChannel2_1024_s_32x4);
6090 intermediateResults2_B_s_32x4 = vaddq_s32(intermediateResults2_B_s_32x4, biasChannel2_1024_s_32x4);
6091 intermediateResults2_C_s_32x4 = vaddq_s32(intermediateResults2_C_s_32x4, biasChannel2_1024_s_32x4);
6092 intermediateResults2_D_s_32x4 = vaddq_s32(intermediateResults2_D_s_32x4, biasChannel2_1024_s_32x4);
6095 uint8x16x3_t results_u_8x16x3;
6098 results_u_8x16x3.val[0] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_D_s_32x4, 10))));
6100 results_u_8x16x3.val[1] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_D_s_32x4, 10))));
6101 results_u_8x16x3.val[2] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_D_s_32x4, 10))));
6104 vst3q_u8(target, results_u_8x16x3);
6107OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_128_s_16x8,
const int16x8_t& factorChannel10_128_s_16x8,
const int16x8_t& factorChannel20_128_s_16x8,
const int16x8_t& factorChannel01_128_s_16x8,
const int16x8_t& factorChannel11_128_s_16x8,
const int16x8_t& factorChannel21_128_s_16x8,
const int16x8_t& factorChannel02_128_s_16x8,
const int16x8_t& factorChannel12_128_s_16x8,
const int16x8_t& factorChannel22_128_s_16x8,
const int16x8_t& biasChannel0_128_s_16x8,
const int16x8_t& biasChannel1_128_s_16x8,
const int16x8_t& biasChannel2_128_s_16x8)
6109 ocean_assert(source !=
nullptr && target !=
nullptr);
6129 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6131 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[0])));
6132 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[1])));
6133 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[2])));
6135 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[0])));
6136 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[1])));
6137 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[2])));
6140 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_128_s_16x8);
6141 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_128_s_16x8);
6142 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_128_s_16x8);
6144 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_128_s_16x8);
6145 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_128_s_16x8);
6146 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_128_s_16x8);
6149 intermediateResults0_low_s_16x8 = vmlaq_s16(intermediateResults0_low_s_16x8, source1_low_s_16x8, factorChannel01_128_s_16x8);
6150 intermediateResults1_low_s_16x8 = vmlaq_s16(intermediateResults1_low_s_16x8, source1_low_s_16x8, factorChannel11_128_s_16x8);
6151 intermediateResults2_low_s_16x8 = vmlaq_s16(intermediateResults2_low_s_16x8, source1_low_s_16x8, factorChannel21_128_s_16x8);
6153 intermediateResults0_high_s_16x8 = vmlaq_s16(intermediateResults0_high_s_16x8, source1_high_s_16x8, factorChannel01_128_s_16x8);
6154 intermediateResults1_high_s_16x8 = vmlaq_s16(intermediateResults1_high_s_16x8, source1_high_s_16x8, factorChannel11_128_s_16x8);
6155 intermediateResults2_high_s_16x8 = vmlaq_s16(intermediateResults2_high_s_16x8, source1_high_s_16x8, factorChannel21_128_s_16x8);
6158 intermediateResults0_low_s_16x8 = vmlaq_s16(intermediateResults0_low_s_16x8, source2_low_s_16x8, factorChannel02_128_s_16x8);
6159 intermediateResults1_low_s_16x8 = vmlaq_s16(intermediateResults1_low_s_16x8, source2_low_s_16x8, factorChannel12_128_s_16x8);
6160 intermediateResults2_low_s_16x8 = vmlaq_s16(intermediateResults2_low_s_16x8, source2_low_s_16x8, factorChannel22_128_s_16x8);
6162 intermediateResults0_high_s_16x8 = vmlaq_s16(intermediateResults0_high_s_16x8, source2_high_s_16x8, factorChannel02_128_s_16x8);
6163 intermediateResults1_high_s_16x8 = vmlaq_s16(intermediateResults1_high_s_16x8, source2_high_s_16x8, factorChannel12_128_s_16x8);
6164 intermediateResults2_high_s_16x8 = vmlaq_s16(intermediateResults2_high_s_16x8, source2_high_s_16x8, factorChannel22_128_s_16x8);
6168 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, biasChannel0_128_s_16x8);
6169 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, biasChannel0_128_s_16x8);
6171 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, biasChannel1_128_s_16x8);
6172 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, biasChannel1_128_s_16x8);
6174 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, biasChannel2_128_s_16x8);
6175 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, biasChannel2_128_s_16x8);
6178 uint8x16x3_t results_u_8x16x3;
6181 results_u_8x16x3.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 7));
6182 results_u_8x16x3.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 7));
6183 results_u_8x16x3.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 7));
6186 vst3q_u8(target, results_u_8x16x3);
6189OCEAN_FORCE_INLINE
void FrameChannels::convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const int16x8_t& factorChannel00_64_s_16x8,
const int16x8_t& factorChannel10_64_s_16x8,
const int16x8_t& factorChannel20_64_s_16x8,
const int16x8_t& factorChannel01_64_s_16x8,
const int16x8_t& factorChannel11_64_s_16x8,
const int16x8_t& factorChannel21_64_s_16x8,
const int16x8_t& factorChannel02_64_s_16x8,
const int16x8_t& factorChannel12_64_s_16x8,
const int16x8_t& factorChannel22_64_s_16x8,
const uint8x8_t& biasChannel0_u_8x8,
const uint8x8_t& biasChannel1_u_8x8,
const uint8x8_t& biasChannel2_u_8x8,
const uint8x16_t& channelValue3_u_8x16)
6191 ocean_assert(source !=
nullptr && target !=
nullptr);
6206 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6209 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
6210 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
6211 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
6213 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
6214 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
6215 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
6219 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_64_s_16x8);
6220 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_64_s_16x8);
6221 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_64_s_16x8);
6223 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_64_s_16x8);
6224 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_64_s_16x8);
6225 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_64_s_16x8);
6227 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel01_64_s_16x8));
6228 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel11_64_s_16x8));
6229 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel21_64_s_16x8));
6231 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel01_64_s_16x8));
6232 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel11_64_s_16x8));
6233 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel21_64_s_16x8));
6235 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel02_64_s_16x8));
6236 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel12_64_s_16x8));
6237 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel22_64_s_16x8));
6239 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel02_64_s_16x8));
6240 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel12_64_s_16x8));
6241 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel22_64_s_16x8));
6243 uint8x16x4_t results_u_8x16x4;
6246 results_u_8x16x4.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 6));
6247 results_u_8x16x4.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 6));
6248 results_u_8x16x4.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 6));
6249 results_u_8x16x4.val[3] = channelValue3_u_8x16;
6252 vst4q_u8(target, results_u_8x16x4);
6255template <
bool tUseFactorChannel0,
bool tUseFactorChannel1,
bool tUseFactorChannel2,
bool tUseFactorChannel3>
6258 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2 || tUseFactorChannel3,
"Invalid multiplication factors!");
6260 ocean_assert(source !=
nullptr && target !=
nullptr);
6280 uint8x8x4_t pixels_u_8x8x4 = vld4_u8(source);
6282 uint16x8_t intermediateResults_16x8;
6286 if constexpr (tUseFactorChannel0)
6288 intermediateResults_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel0_128_u_8x8);
6292 intermediateResults_16x8 = vdupq_n_u16(0u);
6297 if constexpr (tUseFactorChannel1)
6299 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[1], factorChannel1_128_u_8x8);
6304 if constexpr (tUseFactorChannel2)
6306 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[2], factorChannel2_128_u_8x8);
6311 if constexpr (tUseFactorChannel3)
6313 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[3], factorChannel3_128_u_8x8);
6317 uint8x8_t results_u_8x8 = vqrshrn_n_u16(intermediateResults_16x8, 7);
6320 vst1_u8(target, results_u_8x8);
6323OCEAN_FORCE_INLINE
void FrameChannels::convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(
const uint8_t*
const source, uint8_t*
const target,
const uint8x8_t& factorChannel00_128_u_8x8,
const uint8x8_t& factorChannel10_128_u_8x8,
const uint8x8_t& factorChannel01_128_u_8x8,
const uint8x8_t& factorChannel11_128_u_8x8,
const uint8x8_t& factorChannel02_128_u_8x8,
const uint8x8_t& factorChannel12_128_u_8x8,
const uint8x8_t& factorChannel03_128_u_8x8,
const uint8x8_t& factorChannel13_128_u_8x8)
6325 ocean_assert(source !=
nullptr && target !=
nullptr);
6347 uint8x8x4_t pixels_u_8x8x4 = vld4_u8(source);
6349 uint16x8_t intermediateResultsChannel0_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel00_128_u_8x8);
6350 uint16x8_t intermediateResultsChannel1_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel10_128_u_8x8);
6352 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[1], factorChannel01_128_u_8x8);
6353 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[1], factorChannel11_128_u_8x8);
6355 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[2], factorChannel02_128_u_8x8);
6356 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[2], factorChannel12_128_u_8x8);
6358 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[3], factorChannel03_128_u_8x8);
6359 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[3], factorChannel13_128_u_8x8);
6361 uint8x8x2_t results_u_8x8x2;
6365 results_u_8x8x2.val[0] = vqrshrn_n_u16(intermediateResultsChannel0_16x8, 7);
6366 results_u_8x8x2.val[1] = vqrshrn_n_u16(intermediateResultsChannel1_16x8, 7);
6369 vst2_u8(target, results_u_8x8x2);
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition FrameChannels.h:51
static bool premultipliedAlphaToStraightAlpha(const Frame &source, Frame &target, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
static bool zipChannels(const Frames &sourceFrames, Frame &targetFrame, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Zips/interleaves 1-channel images into one image with n-channels.
static bool separateTo1Channel(const Frame &sourceFrame, Frames &targetFrames, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static bool premultipliedAlphaToStraightAlpha(Frame &frame, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
static bool separateTo1Channel(const Frame &sourceFrame, const std::initializer_list< Frame * > &targetFrames, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static bool zipChannels(const std::initializer_list< Frame > &sourceFrames, Frame &targetFrame, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Zips/interleaves 1-channel images into one image with n-channels.
static bool straightAlphaToPremultipliedAlpha(Frame &frame, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
static bool straightAlphaToPremultipliedAlpha(const Frame &source, Frame &target, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
This class implements frame channel conversion, transformation and extraction functions.
Definition FrameChannels.h:31
static void reverseChannelOrder(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Reverses the order of the channels of a frame with zipped pixel format.
Definition FrameChannels.h:2840
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_1024_s_16x8, const __m128i &factorChannel10_1024_s_16x8, const __m128i &factorChannel20_1024_s_16x8, const __m128i &factorChannel01_1024_s_16x8, const __m128i &factorChannel11_1024_s_16x8, const __m128i &factorChannel21_1024_s_16x8, const __m128i &factorChannel02_1024_s_16x8, const __m128i &factorChannel12_1024_s_16x8, const __m128i &factorChannel22_1024_s_16x8, const __m128i &biasChannel0_1024_s_32x4, const __m128i &biasChannel1_1024_s_32x4, const __m128i &biasChannel2_1024_s_32x4)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5341
static void addChannelValueRow(const T *source, T *target, const size_t size, const void *channelValueParameter)
Adds a channel to a given row with generic (zipped) pixel format and sets all values to a specified v...
Definition FrameChannels.h:4289
static void shuffleRowChannelsAndSetLastChannelValue(const T *source, T *target, const size_t size, const void *options=nullptr)
Shuffles the channels of row pixels by application of a specified shuffle pattern and sets the last c...
Definition FrameChannels.h:3747
static void separateTo1Channel(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition FrameChannels.h:1847
static OCEAN_FORCE_INLINE void convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8, const uint8x16_t &channelValue3_u_8x16)
Converts 16 pixels with 3 channels per pixel to 16 pixels with 4 channels per pixel by a linear combi...
Definition FrameChannels.h:6189
static void addChannelRow(const void **sources, void **targets, const unsigned int multipleRowIndex, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const void *options)
Adds a channel to a given row with generic (zipped) pixel format and copies the information of the ne...
Definition FrameChannels.h:4189
static void shuffleChannelsAndSetLastChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Shuffles the channels of source frame and sets the last channel with constant value in the target fra...
Definition FrameChannels.h:3910
static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactors0_128_u_16x8, const __m128i &multiplicationFactors1_128_u_16x8, const __m128i &multiplicationFactors2_128_u_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with one channel per pixel by a linear comb...
Definition FrameChannels.h:5187
static void shuffleChannels(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Shuffles the channels of a frame by an arbitrary pattern.
Definition FrameChannels.h:3882
static void convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *channelMultiplicationFactors_128)
Converts a row of pixels with 3 channels to pixels with one channel by a linear combination of the fo...
Definition FrameChannels.h:4877
static void convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *channelMultiplicationFactors_128)
Converts a row of pixels with 4 channels to pixels with one channel by a linear combination of the fo...
Definition FrameChannels.h:4950
static void copyChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Copies one channel from a given frame with zipped pixel format to another frame with zipped pixel for...
Definition FrameChannels.h:2799
static void zipChannels(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition FrameChannels.h:2598
static void straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t *const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
Definition FrameChannels.h:4092
static void applyRowOperator(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const RowOperatorFunction< TSource, TTarget, tSourceChannels, tTargetChannels > &rowOperatorFunction, Worker *worker=nullptr)
Applies a row operator to all rows of a source image.
Definition FrameChannels.h:4007
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5768
static void convertRow4ChannelsTo2Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *multiplicationFactors_128)
Converts a row of pixels with 4 channels to pixels with two channel by a linear combination of the fo...
static void setChannelSubset(T *frame, const unsigned int width, const T value, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Sets one channel of a frame with one unique value.
Definition FrameChannels.h:4488
static void applyBivariateOperatorSubset(const TSource0 *source0, const TSource1 *source1, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Generic bivariate pixel operations.
Definition FrameChannels.h:4721
static void applyAdvancedPixelModifier(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:3969
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition FrameChannels.h:5714
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x4_t &factorChannel00_1024_s_16x4, const int16x4_t &factorChannel10_1024_s_16x4, const int16x4_t &factorChannel20_1024_s_16x4, const int16x4_t &factorChannel01_1024_s_16x4, const int16x4_t &factorChannel11_1024_s_16x4, const int16x4_t &factorChannel21_1024_s_16x4, const int16x4_t &factorChannel02_1024_s_16x4, const int16x4_t &factorChannel12_1024_s_16x4, const int16x4_t &factorChannel22_1024_s_16x4, const int32x4_t &biasChannel0_1024_s_32x4, const int32x4_t &biasChannel1_1024_s_32x4, const int32x4_t &biasChannel2_1024_s_32x4)
Converts 8 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear com...
Definition FrameChannels.h:5981
static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel0_128_u_8x8, const uint8x8_t &factorChannel1_128_u_8x8, const uint8x8_t &factorChannel2_128_u_8x8, const uint8x8_t &factorChannel3_128_u_8x8)
Converts 8 pixels with 4 channels per pixel to 8 pixels with one channel per pixel by a linear combin...
static void addFirstChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be th...
Definition FrameChannels.h:2711
static void addLastChannel(const T *source, const T *sourceNewChannel, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the ba...
Definition FrameChannels.h:2731
static void removeFirstChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Removes the first channel from a given frame with zipped (generic) pixel format.
Definition FrameChannels.h:2767
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x4_t &factorChannel00_1024_s_16x4, const int16x4_t &factorChannel10_1024_s_16x4, const int16x4_t &factorChannel20_1024_s_16x4, const int16x4_t &factorChannel01_1024_s_16x4, const int16x4_t &factorChannel11_1024_s_16x4, const int16x4_t &factorChannel21_1024_s_16x4, const int16x4_t &factorChannel02_1024_s_16x4, const int16x4_t &factorChannel12_1024_s_16x4, const int16x4_t &factorChannel22_1024_s_16x4, const int32x4_t &biasChannel0_1024_s_32x4, const int32x4_t &biasChannel1_1024_s_32x4, const int32x4_t &biasChannel2_1024_s_32x4)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition FrameChannels.h:5890
static void addLastChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be th...
Definition FrameChannels.h:2747
static void convertRow3ChannelsTo3Channels8BitPerChannel6BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void reverseRowPixelOrderInPlace(T *data, const size_t size)
Reverses/mirrors the order of pixels in a given row (or a memory block in general) in place.
Definition FrameChannels.h:3017
static void applyRowOperatorSubset(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const RowOperatorFunction< TSource, TTarget, tSourceChannels, tTargetChannels > rowOperatorFunction, const unsigned int firstRow, const unsigned int numberRows)
Applies a row operator to a subset of all rows of a source image.
Definition FrameChannels.h:4854
static void applyPixelModifier(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:3954
static void narrowRow16BitPerChannelTo8BitPerChannel(const uint16_t *source, uint8_t *target, const size_t size, const void *unusedParameters=nullptr)
Narrows a row of pixels with 16 bit channels to pixels with 8 bit channels.
Definition FrameChannels.h:4130
static void applyAdvancedPixelModifierSubset(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:4615
static void convertRow4ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void shuffleRowChannels(const T *source, T *target, const size_t size, const void *unusedOptions=nullptr)
Shuffles the channels of row pixels by application of a specified shuffle pattern.
Definition FrameChannels.h:3387
static void premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t *const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
Definition FrameChannels.h:4054
static void convertRow3ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_128_s_16x8, const int16x8_t &factorChannel10_128_s_16x8, const int16x8_t &factorChannel20_128_s_16x8, const int16x8_t &factorChannel01_128_s_16x8, const int16x8_t &factorChannel11_128_s_16x8, const int16x8_t &factorChannel21_128_s_16x8, const int16x8_t &factorChannel02_128_s_16x8, const int16x8_t &factorChannel12_128_s_16x8, const int16x8_t &factorChannel22_128_s_16x8, const int16x8_t &biasChannel0_128_s_16x8, const int16x8_t &biasChannel1_128_s_16x8, const int16x8_t &biasChannel2_128_s_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with 3 channels per pixel by a linear combi...
Definition FrameChannels.h:6107
static void convertRow3ChannelsTo4Channels8BitPerChannel6BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 4 channels by a linear combination of the thr...
static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME
Definition of a constant to specify that the number of channels are not known at compile time but at ...
Definition FrameChannels.h:37
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_128_s_16x8, const int16x8_t &factorChannel10_128_s_16x8, const int16x8_t &factorChannel20_128_s_16x8, const int16x8_t &factorChannel01_128_s_16x8, const int16x8_t &factorChannel11_128_s_16x8, const int16x8_t &factorChannel21_128_s_16x8, const int16x8_t &factorChannel02_128_s_16x8, const int16x8_t &factorChannel12_128_s_16x8, const int16x8_t &factorChannel22_128_s_16x8, const int16x8_t &biasChannel0_128_s_16x8, const int16x8_t &biasChannel1_128_s_16x8, const int16x8_t &biasChannel2_128_s_16x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition FrameChannels.h:5833
static void copyChannelRow(const T *source, T *target, const size_t size, const void *unusedParameters=nullptr)
Copies one channel from a source row to a target row with generic (zipped) pixel format.
Definition FrameChannels.h:4328
static void reverseRowPixelOrder(const T *source, T *target, const size_t size)
Reverses/mirrors the order of pixels in a given row (or a memory block in general).
Definition FrameChannels.h:2856
static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel0_128_u_8x8, const uint8x8_t &factorChannel1_128_u_8x8, const uint8x8_t &factorChannel2_128_u_8x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with one channel per pixel by a linear combin...
static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactors0123_128_s_32x)
Converts 16 pixels with 4 channels per pixel to 16 pixels with one channel per pixel by a linear comb...
Definition FrameChannels.h:5478
static void removeLastChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Removes the last channel from a given frame with zipped (generic) pixel format.
Definition FrameChannels.h:2783
static void transformGeneric(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker)
Transforms a frame with generic pixel format (with zipped pixel information) like RGB24 or YUV24,...
Definition FrameChannels.h:4029
static void setChannel(T *frame, const unsigned int width, const unsigned int height, const T value, const unsigned int framePaddingElements, Worker *worker=nullptr)
Sets one channel of a frame with a specific unique value.
Definition FrameChannels.h:2821
static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t *const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
Definition FrameChannels.h:5114
static void narrow16BitPerChannelTo8BitPerChannel(const uint16_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Narrows 16 bit channels of a frame to 8 bit channels.
Definition FrameChannels.h:3938
static void transformGenericSubset(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const RowReversePixelOrderFunction< void > rowReversePixelOrderFunction, const unsigned int bytesPerRow, const unsigned int sourceStrideBytes, const unsigned int targetStrideBytes, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a frame with generic pixel format (with zipped pixel information) like RGB24 o...
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_128_s_16x8, const __m128i &factorChannel10_128_s_16x8, const __m128i &factorChannel20_128_s_16x8, const __m128i &factorChannel01_128_s_16x8, const __m128i &factorChannel11_128_s_16x8, const __m128i &factorChannel21_128_s_16x8, const __m128i &factorChannel02_128_s_16x8, const __m128i &factorChannel12_128_s_16x8, const __m128i &factorChannel22_128_s_16x8, const __m128i &biasChannel0_s_16x8, const __m128i &biasChannel1_s_16x8, const __m128i &biasChannel2_s_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5253
static void reverseRowChannelOrder(const T *source, T *target, const size_t size, const void *unusedOptions=nullptr)
Reverses/mirrors the order of channels in a given row (or a memory block in general).
Definition FrameChannels.h:3195
static void convertRow3ChannelsTo3Channels8BitPerChannel10BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void applyBivariateOperator(const TSource0 *source0, const TSource1 *source1, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Generic bivariate pixel operations Applies bivariate per-pixel operators: C(y, x) = op(A(y,...
Definition FrameChannels.h:3988
static void addFirstChannel(const T *source, const T *sourceNewChannel, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the fr...
Definition FrameChannels.h:2695
static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel00_128_u_8x8, const uint8x8_t &factorChannel10_128_u_8x8, const uint8x8_t &factorChannel01_128_u_8x8, const uint8x8_t &factorChannel11_128_u_8x8, const uint8x8_t &factorChannel02_128_u_8x8, const uint8x8_t &factorChannel12_128_u_8x8, const uint8x8_t &factorChannel03_128_u_8x8, const uint8x8_t &factorChannel13_128_u_8x8)
Converts 8 pixels with 4 channels per pixel to 8 pixels with two channels per pixel by a linear combi...
Definition FrameChannels.h:6323
static void separateTo1ChannelRuntime(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition FrameChannels.h:4349
static void zipChannelsRuntime(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition FrameChannels.h:4422
void(*)(const TSource *sourceRow, TTarget *targetRow, const unsigned int width, const unsigned int height, unsigned int rowIndex, const unsigned int sourceStrideElements, const unsigned int targetStrideElements) RowOperatorFunction
Definition of a function pointer to a function able to operate on an entire image row.
Definition FrameChannels.h:43
static void applyPixelModifierSubset(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:4511
static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t *const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
Definition FrameChannels.h:5025
static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactorsChannel0_0123_128_s_16x8, const __m128i &multiplicationFactorsChannel1_0123_128_s_16x8)
Converts 16 pixels with 4 channels per pixel to 16 pixels with two channel per pixel by a linear comb...
Definition FrameChannels.h:5541
This is the base class for all frame converter classes.
Definition FrameConverter.h:32
ConversionFlag
Definition of individual conversion flags.
Definition FrameConverter.h:39
@ CONVERT_NORMAL
Normal conversion, neither flips nor mirrors the image.
Definition FrameConverter.h:49
@ CONVERT_FLIPPED_AND_MIRRORED
Rotated conversion, rotates the image by 180.0 degrees with anchor in the center of the image.
Definition FrameConverter.h:82
@ CONVERT_MIRRORED
Mirrored conversion, exchanges left and right of the image (like in a mirror, mirroring around the y-...
Definition FrameConverter.h:71
@ CONVERT_FLIPPED
Flipped conversion, exchanges top and bottom of the image (flipping around the x-axis).
Definition FrameConverter.h:60
static void convertGenericPixelFormat(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const ConversionFlag flag, const RowConversionFunction< TSource, TTarget > rowConversionFunction, const RowReversePixelOrderInPlaceFunction< TTarget > targetReversePixelOrderInPlaceFunction, const bool areContinuous, const void *options, Worker *worker)
Converts a frame with generic pixel format (e.g., RGBA32, BGR24, YUV24, ...) to a frame with generic ...
Definition FrameConverter.h:3225
void(*)(T *row, const size_t width) RowReversePixelOrderInPlaceFunction
Definition of a function pointer to a function able to reverse the order of pixels in an image row wi...
Definition FrameConverter.h:603
void(*)(const T *inputRow, T *targetRow, const size_t width) RowReversePixelOrderFunction
Definition of a function pointer to a function able to reverse the order of pixels in an image row wi...
Definition FrameConverter.h:594
static void convertArbitraryPixelFormat(const void **sources, void **targets, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int multipleRowsPerIteration, const MultipleRowsConversionFunction multipleRowsConversionFunction, const void *options, Worker *worker)
Converts a frame with arbitrary pixel format (e.g., Y_UV12, Y_VU12, YUYV16, ...) to a frame with arbi...
Definition FrameConverter.h:3248
static OCEAN_FORCE_INLINE uint8x16_t cast16ElementsNEON(const float32x4_t &sourceA_f_32x4, const float32x4_t &sourceB_f_32x4, const float32x4_t &sourceC_f_32x4, const float32x4_t &sourceD_f_32x4)
Casts 16 float elements to 16 uint8_t elements.
Definition NEON.h:1208
static __m128i divideByRightShiftSigned32Bit(const __m128i &value, const unsigned int rightShifts)
Divides eight signed 32 bit values by applying a right shift.
Definition SSE.h:3108
static __m128i load128i(const void *const buffer)
Loads a 128i value from the memory.
Definition SSE.h:3619
static void store128i(const __m128i &value, uint8_t *const buffer)
Stores a 128i value to the memory.
Definition SSE.h:3764
static __m128i divideByRightShiftSigned16Bit(const __m128i &value, const unsigned int rightShifts)
Divides eight signed 16 bit values by applying a right shift.
Definition SSE.h:3066
static OCEAN_FORCE_INLINE void multiplyInt8x16ToInt32x8AndAccumulate(const __m128i &values0, const __m128i &values1, __m128i &results0, __m128i &results1)
Multiplies 8 int16_t values with 8 int16_t values and adds the products to 8 int32_t values.
Definition SSE.h:3909
static OCEAN_FORCE_INLINE void interleave3Channel8Bit48Elements(const __m128i &channel0, const __m128i &channel1, const __m128i &channel2, __m128i &interleavedA, __m128i &interleavedB, __m128i &interleavedC)
Interleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
Definition SSE.h:3345
static OCEAN_FORCE_INLINE void reverseChannelOrder3Channel8Bit48Elements(const __m128i &interleaved0, const __m128i &interleaved1, const __m128i &interleaved2, __m128i &reversedInterleaved0, __m128i &reversedInterleaved1, __m128i &reversedInterleaved2)
Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channe...
Definition SSE.h:3387
static __m128i removeHighBits16_8(const __m128i &value)
Removes the higher 8 bits of eight 16 bit elements.
Definition SSE.h:3799
static OCEAN_FORCE_INLINE void deInterleave3Channel8Bit48Elements(const __m128i &interleavedA, const __m128i &interleavedB, const __m128i &interleavedC, __m128i &channel0, __m128i &channel1, __m128i &channel2)
Deinterleaves 48 elements of e.g., and image with 3 channels and 8 bit per element.
Definition SSE.h:3304
static __m128i set128i(const unsigned long long high64, const unsigned long long low64)
Sets a 128i value by two 64 bit values.
Definition SSE.h:3770
static OCEAN_FORCE_INLINE void reverseChannelOrder4Channel8Bit64Elements(const uint8_t *interleaved, uint8_t *reversedInterleaved)
Reverses the order of the channels of 16 pixels (64 elements) of an image with 4 interleaved channels...
Definition SSE.h:3412
static OCEAN_FORCE_INLINE void multiplyInt8x16ToInt32x8(const __m128i &values0, const __m128i &values1, __m128i &products0, __m128i &products1)
Multiplies 8 int16_t values with 8 int16_t values and returns the products as 8 int32_t results.
Definition SSE.h:3900
static OCEAN_FORCE_INLINE void reverseChannelOrder2Channel8Bit32Elements(const uint8_t *interleaved, uint8_t *reversedInterleaved)
Reverses the order of the channels of 16 pixels (32 elements) of an image with 2 interleaved channels...
Definition SSE.h:3372
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition Caller.h:2877
This class implements Ocean's image class.
Definition Frame.h:1808
PixelFormat
Definition of all pixel formats available in the Ocean framework.
Definition Frame.h:183
TypeMapperBySize< sizeof(T)>::Type Type
Definition of an invalid mapped data type.
Definition DataType.h:508
This class implements a worker able to distribute function calls over different threads.
Definition Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
std::vector< Frame > Frames
Definition of a vector holding padding frames.
Definition Frame.h:1771
std::vector< Index32 > Indices32
Definition of a vector holding 32 bit index values.
Definition Base.h:96
The namespace covering the entire Ocean framework.
Definition Accessor.h:15
Default definition of a type with tBytes bytes.
Definition DataType.h:32