This class implements computer vision functions using SSE extensions. More...

Data Structures
union	M128
	This union defines a wrapper for the __m128 SSE intrinsic data type. More...

union	M128d
	This union defines a wrapper for the __m128 SSE intrinsic data type. More...

union	M128i
	This union defines a wrapper for the __m128i SSE intrinsic data type. More...

Static Public Member Functions
static void	prefetchT0 (const void *const data)
	Prefetches a block of temporal memory into all cache levels. More...

static void	prefetchT1 (const void *const data)
	Prefetches a block of temporal memory in all cache levels except 0th cache level. More...

static void	prefetchT2 (const void *const data)
	Prefetches a block of temporal memory in all cache levels, except 0th and 1st cache levels. More...

static void	prefetchNTA (const void *const data)
	Prefetches a block of non-temporal memory into non-temporal cache structure. More...

template<unsigned int tIndex>
static uint8_t	value_u8 (const __m128i &value)
	Returns one specific 8 bit unsigned integer value of a m128i value object. More...

static uint8_t	value_u8 (const __m128i &value, const unsigned int index)
	Returns one specific 8 bit unsigned integer value of a m128i value object. More...

template<unsigned int tIndex>
static uint16_t	value_u16 (const __m128i &value)
	Returns one specific 16 bit unsigned integer value of a m128i value object. More...

template<unsigned int tIndex>
static unsigned int	value_u32 (const __m128i &value)
	Returns one specific 32 bit unsigned integer value of a m128i value object. More...

static OCEAN_FORCE_INLINE unsigned int	sum_u32_4 (const __m128i &value)
	Adds the four (all four) individual 32 bit unsigned integer values of a m128i value and returns the result. More...

static unsigned int	sum_u32_first_2 (const __m128i &value)
	Adds the first two individual 32 bit unsigned integer values of a m128i value and returns the result. More...

static unsigned int	sum_u32_first_third (const __m128i &value)
	Adds the first and the second 32 bit unsigned integer values of a m128i value and returns the result. More...

static OCEAN_FORCE_INLINE float	sum_f32_4 (const __m128 &value)
	Adds the four (all four) individual 32 bit float of a m128 value and returns the result. More...

static OCEAN_FORCE_INLINE double	sum_f64_2 (const __m128d &value)
	Adds the two (all two) individual 64 bit float of a m128 value and returns the result. More...

static __m128i	sumSquareDifferences8BitBack11Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square differences determination for the last 11 elements of an 16 elements buffer with 8 bit precision. More...

static __m128i	sumSquareDifference8BitFront12Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square difference determination for the first 12 elements of an 16 elements buffer with 8 bit precision, the remaining 4 elements are set to zero. More...

static __m128i	sumSquareDifference8BitBack12Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square difference determination for the last 12 elements of an 16 elements buffer with 8 bit precision, the beginning 4 elements are interpreted as zero. More...

template<bool tBufferHas16Bytes>
static __m128i	sumSquareDifference8BitFront13Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square difference determination for the first 13 elements of a buffer with 8 bit precision. More...

static __m128i	sumSquareDifference8BitBack13Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square difference determination for the last 13 elements of an 16 elements buffer with 8 bit precision, the beginning 3 elements are interpreted as zero. More...

template<bool tBufferHas16Bytes>
static __m128i	sumSquareDifference8BitFront15Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square difference determination for the first 15 elements of a buffer with 8 bit precision. More...

static __m128i	sumSquareDifference8Bit16Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square difference determination for 16 elements with 8 bit precision. More...

static __m128i	sumSquareDifference8Bit16ElementsAligned16 (const uint8_t const image0, const uint8_t const image1)
	Sum square difference determination for 16 elements with 8 bit precision. More...

static __m128i	sumSquareDifference8Bit16Elements (const __m128i &row0, const __m128i &row1)
	Sum square difference determination for 16 elements with 8 bit precision. More...

static void	average8Elements1Channel32Bit2x2 (const float const image0, const float const image1, float *const result)
	Averages 8 elements of 2x2 blocks for 1 channel 32 bit frames. More...

static void	average8Elements1Channel8Bit2x2 (const uint8_t const image0, const uint8_t const image1, uint8_t *const result)
	Averages 8 elements of 2x2 blocks for 1 channel 8 bit frames. More...

static void	average8ElementsBinary1Channel8Bit2x2 (const uint8_t const image0, const uint8_t const image1, uint8_t *const result, const uint16_t threshold=776u)
	Averages 8 elements of 2x2 blocks for 1 binary (x00 or 0xFF) frames. More...

static void	average16Elements1Channel8Bit2x2 (const uint8_t const image0, const uint8_t const image1, uint8_t *const result)
	Averages 16 elements of 2x2 blocks for 1 channel 8 bit frames. More...

static void	average16ElementsBinary1Channel8Bit2x2 (const uint8_t const image0, const uint8_t const image1, uint8_t *const result, const uint16_t threshold=776u)
	Averages 16 elements of 2x2 blocks for 1 binary (x00 or 0xFF) frames. More...

static void	average32Elements1Channel8Bit2x2 (const uint8_t const image0, const uint8_t const image1, uint8_t *const result)
	Averages 32 elements of 2x2 blocks for 1 channel 8 bit frames. More...

static void	average32ElementsBinary1Channel8Bit2x2 (const uint8_t const image0, const uint8_t const image1, uint8_t *const result, const uint16_t threshold=776u)
	Averages 32 elements of 2x2 blocks for 1 binary (0x00 or 0xFF) frames. More...

static void	average8Elements2Channel16Bit2x2 (const uint8_t const image0, const uint8_t const image1, uint8_t *const result)
	Averages 8 elements of 2x2 blocks for 2 channel 16 bit frames. More...

static void	average8Elements2Channel64Bit2x2 (const float const image0, const float const image1, float *const result)
	Averages 8 elements of 2x2 blocks for 2 channel 64 bit frames. More...

static void	average16Elements2Channel16Bit2x2 (const uint8_t const image0, const uint8_t const image1, uint8_t *const result)
	Averages 16 elements of 2x2 blocks for 2 channel 16 bit frames. More...

static void	average32Elements2Channel16Bit2x2 (const uint8_t const image0, const uint8_t const image1, uint8_t *const result)
	Averages 32 elements of 2x2 blocks for 2 channel 16 bit frames. More...

static void	average6Elements3Channel96Bit2x2 (const float const image0, const float const image1, float *const result)
	Averages 6 elements of 2x2 blocks for 3 channel 96 bit frames. More...

static void	average24Elements3Channel24Bit2x2 (const uint8_t const image0, const uint8_t const image1, uint8_t *const result)
	Averages 24 elements of 2x2 blocks for 3 channel 24 bit frames. More...

static void	average8Elements4Channel128Bit2x2 (const float const image0, const float const image1, float *const result)
	Averages 8 elements of 2x2 blocks for 4 channel 128 bit frames. More...

static void	average16Elements4Channel32Bit2x2 (const uint8_t const image0, const uint8_t const image1, uint8_t *const result)
	Averages 16 elements of 2x2 blocks for 4 channel 32 bit frames. More...

static void	average32Elements4Channel32Bit2x2 (const uint8_t const image0, const uint8_t const image1, uint8_t *const result)
	Averages 32 elements of 2x2 blocks for 4 channel 32 bit frames. More...

static void	average30Elements1Channel8Bit3x3 (const uint8_t const image0, const uint8_t const image1, const uint8_t const image2, uint8_t const result)
	Averages 30 elements of 3x3 blocks for 1 channel 8 bit frames. More...

static __m128i	addOffsetBeforeRightShiftDivisionByTwoSigned16Bit (const __m128i &value)
	Adds 1 to each signed 16 bit value which is both, negative and odd, so that each value can be right shifted by one bit to allow a correct division by two. More...

static __m128i	addOffsetBeforeRightShiftDivisionSigned16Bit (const __m128i &value, const unsigned int rightShifts)
	Adds 2^shifts - 1 to each negative signed 16 bit value, so they each value can be right shifted to allow a correct division by 2^shifts. More...

static __m128i	divideByRightShiftSigned16Bit (const __m128i &value, const unsigned int rightShifts)
	Divides eight signed 16 bit values by applying a right shift. More...

static __m128i	addOffsetBeforeRightShiftDivisionByTwoSigned32Bit (const __m128i &value)
	Adds 1 to each signed 32 bit value which is both, negative and odd, so that each value can be right shifted by one bit to allow a correct division by two. More...

static __m128i	addOffsetBeforeRightShiftDivisionSigned32Bit (const __m128i &value, const unsigned int rightShifts)
	Adds 2^shifts - 1 to each negative signed 32 bit value, so they each value can be right shifted to allow a correct division by 2^shifts. More...

static __m128i	divideByRightShiftSigned32Bit (const __m128i &value, const unsigned int rightShifts)
	Divides eight signed 32 bit values by applying a right shift. More...

static void	gradientHorizontalVertical8Elements1Channel8Bit (const uint8_t source, int8_t response, const unsigned int width)
	Determines the horizontal and the vertical gradients for 16 following pixels for a given 1 channel 8 bit frame. More...

static void	gradientHorizontalVertical8Elements3Products1Channel8Bit (const uint8_t source, int16_t response, const unsigned int width)
	Determines the squared horizontal and vertical gradients and the product of both gradients for 16 following pixels for a given 1 channel 8 bit frame. More...

static __m128i	sumAbsoluteDifferences8BitBack11Elements (const uint8_t const image0, const uint8_t const image1)
	Sum absolute differences determination for the last 11 elements of a 16 elements buffer with 8 bit precision. More...

template<bool tBufferHas16Bytes>
static __m128i	sumAbsoluteDifferences8BitFront10Elements (const uint8_t const image0, const uint8_t const image1)
	Sum absolute differences determination for the first 10 elements of a buffer with 8 bit precision. More...

template<bool tBufferHas16Bytes>
static __m128i	sumAbsoluteDifferences8BitFront15Elements (const uint8_t const image0, const uint8_t const image1)
	Sum absolute differences determination for the first 15 elements of a buffer with 8 bit precision. More...

static __m128i	interpolation1Channel8Bit8Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_, const __m128i &fxfy_, const __m128i &fx_fy, const __m128i &fxfy)
	Interpolates 8 elements of 2x2 blocks for 1 channel 8 bit frames. More...

static __m128i	interpolation2Channel16Bit8Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_, const __m128i &fxfy_, const __m128i &fx_fy, const __m128i &fxfy)
	Interpolates 8 elements of 2x2 blocks for 2 channel 16 bit frames. More...

static __m128i	interpolation3Channel24Bit8Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_, const __m128i &fxfy_, const __m128i &fx_fy, const __m128i &fxfy)
	Interpolates 8 elements of 2x2 blocks for 3 channel 24 bit frames. More...

static __m128i	interpolation1Channel8Bit15Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_fxfy_, const __m128i &fx_fyfxfy)
	Interpolates 15 elements of 2x2 blocks for 1 channel 8 bit frames. More...

static __m128i	interpolation3Channel24Bit12Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_fxfy_, const __m128i &fx_fyfxfy)
	Interpolates 12 elements of 2x2 blocks for 3 channel 24 bit frames. More...

static __m128i	interpolation4Channel32Bit8Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_, const __m128i &fxfy_, const __m128i &fx_fy, const __m128i &fxfy)
	Interpolates 8 elements of 2x2 blocks for 4 channel 32 bit frames. More...

static __m128i	interpolation4Channel32Bit2x4Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_, const __m128i &fxfy_, const __m128i &fx_fy, const __m128i &fxfy)
	Interpolates 2x4 elements (two seperated blocks of 4 elements) of 2x2 blocks for 4 channel 32 bit frames. More...

static unsigned int	ssd2Channel16Bit1x1 (const uint8_t const pixel0, const uint8_t const pixel1, const unsigned int size0, const unsigned int size1, const unsigned int f1x_y_, const unsigned int f1xy_, const unsigned int f1x_y, const unsigned int f1xy)
	Returns the interpolated sum of square difference for one 2 channel 16 bit pixel. More...

static unsigned int	ssd2Channel16Bit1x1 (const uint8_t const pixel0, const uint8_t const pixel1, const unsigned int size0, const unsigned int size1, const unsigned int f0x_y_, const unsigned int f0xy_, const unsigned int f0x_y, const unsigned int f0xy, const unsigned int f1x_y_, const unsigned int f1xy_, const unsigned int f1x_y, const unsigned int f1xy)
	Returns the interpolated sum of square difference for one 2 channel 16 bit pixel. More...

static __m128i	sumAbsoluteDifferences8Bit16Elements (const uint8_t const image0, const uint8_t const image1)
	Sum absolute differences determination for 16 elements of an 16 elements buffer with 8 bit precision. More...

static OCEAN_FORCE_INLINE void	deInterleave3Channel8Bit15Elements (const __m128i &interleaved, __m128i &channel01, __m128i &channel2)
	Deinterleaves 15 elements of e.g., and image with 3 channels and 8 bit per element. More...

static OCEAN_FORCE_INLINE void	deInterleave3Channel8Bit24Elements (const __m128i &interleavedA, const __m128i &interleavedB, __m128i &channel01, __m128i &channel2)
	Deinterleaves 24 elements of e.g., and image with 3 channels and 8 bit per element. More...

static OCEAN_FORCE_INLINE void	deInterleave3Channel8Bit48Elements (const __m128i &interleavedA, const __m128i &interleavedB, const __m128i &interleavedC, __m128i &channel0, __m128i &channel1, __m128i &channel2)
	Deinterleaves 48 elements of e.g., and image with 3 channels and 8 bit per element. More...

static void	deInterleave3Channel8Bit48Elements (const uint8_t *interleaved, __m128i &channel0, __m128i &channel1, __m128i &channel2)
	Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element. More...

static void	deInterleave3Channel8Bit48Elements (const uint8_t interleaved, uint8_t channel0, uint8_t channel1, uint8_t channel2)
	Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element. More...

static void	deInterleave3Channel8Bit45Elements (const uint8_t *interleaved, __m128i &channel0, __m128i &channel1, __m128i &channel2)
	Deinterleaves 45 elements of e.g., an image with 3 channels and 8 bit per element. More...

static OCEAN_FORCE_INLINE void	interleave3Channel8Bit48Elements (const __m128i &channel0, const __m128i &channel1, const __m128i &channel2, __m128i &interleavedA, __m128i &interleavedB, __m128i &interleavedC)
	Interleaves 48 elements of e.g., an image with 3 channels and 8 bit per element. More...

static OCEAN_FORCE_INLINE void	interleave3Channel8Bit48Elements (const uint8_t const channel0, const uint8_t const channel1, const uint8_t const channel2, uint8_t const interleaved)
	Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element. More...

static OCEAN_FORCE_INLINE void	reverseChannelOrder2Channel8Bit32Elements (const uint8_t interleaved, uint8_t reversedInterleaved)
	Reverses the order of the channels of 16 pixels (32 elements) of an image with 2 interleaved channels and 8 bit per element (e.g., YA16 to AY16). More...

static OCEAN_FORCE_INLINE void	reverseChannelOrder3Channel8Bit48Elements (const __m128i &interleaved0, const __m128i &interleaved1, const __m128i &interleaved2, __m128i &reversedInterleaved0, __m128i &reversedInterleaved1, __m128i &reversedInterleaved2)
	Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channels and 8 bit per element. More...

static OCEAN_FORCE_INLINE void	reverseChannelOrder3Channel8Bit48Elements (const uint8_t interleaved, uint8_t reversedInterleaved)
	Reverses the order of the first and last channel of 48 elements (16 pixels) of an image with 3 interleaved channels and 8 bit per element (e.g., RGB24 to BGR24). More...

static OCEAN_FORCE_INLINE void	reverseChannelOrder4Channel8Bit64Elements (const uint8_t interleaved, uint8_t reversedInterleaved)
	Reverses the order of the channels of 16 pixels (64 elements) of an image with 4 interleaved channels and 8 bit per element (e.g., RGBA32 to ABGR24). More...

static void	reverseChannelOrder3Channel8Bit48Elements (uint8_t *interleaved)
	Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channels and 8 bit per element (in place). More...

static void	swapReversedChannelOrder3Channel8Bit48Elements (uint8_t first, uint8_t second)
	Reverses the order of the first and last channel of two sets of 48 elements of an image with 3 interleaved channels and 8 bit per element and further swaps both sets. More...

static void	reverseElements8Bit48Elements (const __m128i &elements0, const __m128i &elements1, const __m128i &elements2, __m128i &reversedElements0, __m128i &reversedElements1, __m128i &reversedElements2)
	Reverses the order of 48 elements with 8 bit per element. More...

static void	reverseElements8Bit48Elements (const uint8_t elements, uint8_t reversedElements)
	Reverses the order of 48 elements with 8 bit per element. More...

static void	reverseElements8Bit48Elements (uint8_t *elements)
	Reverses the order of 48 elements with 8 bit per element (in place). More...

static void	swapReversedElements8Bit48Elements (uint8_t first, uint8_t second)
	Reverses the order of two sets of 48 elements with 8 bit per element and further swaps both sets. More...

static void	shiftChannelToFront4Channel32Bit (const uint8_t elements, uint8_t shiftedElements)
	Shifts the channels of a 4 channel 32 bit pixels to the front and moves the front channel to the back channel. More...

static void	shiftAndMirrorChannelToFront4Channel32Bit (const uint8_t elements, uint8_t shiftedElements)
	Shifts the channels of a 4 channel 32 bit pixels to the front and moves the front channel to the back channel and mirrors the four individual pixels. More...

static void	shiftChannelToBack4Channel32Bit (const uint8_t elements, uint8_t shiftedElements)
	Shifts the channels of a 4 channel 32 bit pixels to the back and moves the back channel to the front channel. More...

static void	shiftAndMirrorChannelToBack4Channel32Bit (const uint8_t elements, uint8_t shiftedElements)
	Shifts the channels of a 4 channel 32 bit pixels to the back and moves the back channel to the front channel and mirrors the four individual pixels. More...

static __m128i	sum1Channel8Bit16Elements (const __m128i &elements)
	Sums 16 elements with 8 bit per element. More...

static __m128i	sum1Channel8Bit16Elements (const uint8_t *elements)
	Sums 16 elements with 8 bit per element. More...

template<bool tBufferHas16Bytes>
static __m128i	sum1Channel8BitFront15Elements (const uint8_t *elements)
	Sums the first 15 elements of a buffer with 8 bit per element. More...

static __m128i	sum1Channel8BitBack15Elements (const uint8_t *elements)
	Sums the last 15 elements of a 16 elements buffer with 8 bit per element, the beginning 1 element is interpreted as zero. More...

static __m128i	sumInterleave3Channel8Bit48Elements (const __m128i &interleaved0, const __m128i &interleaved1, const __m128i &interleaved2)
	Sums 16 elements individually for an interleaved pixel format with 3 channels and 8 bit per channel and element. More...

static __m128i	sumInterleave3Channel8Bit48Elements (const uint8_t *interleaved)
	Sums 16 elements individually for an interleaved pixel format with 3 channels and 8 bit per channel and element. More...

static __m128i	sumInterleave3Channel8Bit45Elements (const uint8_t *interleaved)
	Sums 15 elements individually for an interleaved pixel format with 3 channels and 8 bit per channel and element. More...

static __m128i	load128iLower64 (const void *const buffer)
	Loads the lower 64 bit of a 128i value from the memory. More...

static __m128i	load128i (const void *const buffer)
	Loads a 128i value from the memory. More...

template<bool tBufferHas16Bytes>
static __m128i	load_u8_10_upper_zero (const uint8_t *const buffer)
	Loads 10 bytes from memory, which holds either at least 16 bytes or exactly 10 bytes, to a 128i value and sets the remaining bytes of the resulting 128i value to zero. More...

template<bool tBufferHas16Bytes>
static __m128i	load_u8_15_upper_zero (const uint8_t *const buffer)
	Loads 15 bytes from memory, which holds either at least 16 bytes or exactly 15 bytes, to a 128i value and sets the remaining byte of the resulting 128i value to zero. More...

template<bool tBufferHas16Bytes>
static __m128i	load_u8_13_lower_random (const uint8_t *const buffer)
	Loads 13 bytes from memory, which holds either at least 16 bytes or exactly 13 bytes, to a 128i value while the remaining byte of the resulting 128i value will be random. More...

template<bool tBufferHas16Bytes>
static __m128i	load_u8_15_lower_zero (const uint8_t *const buffer)
	Loads 15 bytes from memory, which holds either at least 16 bytes or exactly 15 bytes, to a 128i value and sets the remaining byte of the resulting 128i value to zero. More...

template<bool tBufferHas16Bytes>
static __m128i	load_u8_15_lower_random (const uint8_t *const buffer)
	Loads 15 bytes from memory, which holds either at least 16 bytes or exactly 15 bytes, to a 128i value while the remaining byte of the resulting 128i value will be random. More...

template<unsigned int tShiftBytes>
static __m128i	load_u8_16_and_shift_right (const uint8_t *const buffer)
	Loads 16 bytes from memory which is at least 16 bytes large and shifts the 128i value by a specified number of bytes to the right (by inserting zeros). More...

static void	store128i (const __m128i &value, uint8_t *const buffer)
	Stores a 128i value to the memory. More...

static __m128i	set128i (const unsigned long long high64, const unsigned long long low64)
	Sets a 128i value by two 64 bit values. More...

static __m128i	removeHighBits32_16 (const __m128i &value)
	Removes the higher 16 bits of four 32 bit elements. More...

static __m128i	removeLowBits32_16 (const __m128i &value)
	Removes the lower 16 bits of four 32 bit elements. More...

static __m128i	removeHighBits16_8 (const __m128i &value)
	Removes the higher 8 bits of eight 16 bit elements. More...

static __m128i	removeHighBits16_8_7_lower (const __m128i &value)
	Removes the higher 8 bits of eight 16 bit elements and sets the upper two bytes to zero. More...

static __m128i	removeHighBits16_8_7_upper (const __m128i &value)
	Removes the higher 8 bits of eight 16 bit elements and sets the lower two bytes to zero. More...

static __m128i	moveLowBits16_8ToLow64 (const __m128i &value)
	Moves the lower 8 bits of eight 16 bit elements to the lower 64 bits and fills the high 64 bits with 0. More...

static __m128i	moveLowBits32_8ToLow32 (const __m128i &value)
	Moves the lower 8 bits of four 32 bit elements to the lower 32 bits and fills the high 96 bits with 0. More...

static __m128i	moveLowBits32_16ToLow64 (const __m128i &value)
	Moves the lower 16 bits of four 32 bit elements to the lower 64 bits and fills the high 64 bits with 0. More...

static __m128i	moveLowBits16_8ToHigh64 (const __m128i &value)
	Moves the lower 8 bits of eight 16 bit elements to the higher 64 bits and fills the low 64 bits with 0. More...

static __m128i	moveHighBits32_16 (const __m128i &value)
	Moves the higher 16 bits of four 32 bit elements to the lower 16 bits and fills the high bits with 0. More...

static __m128i	moveHighBits16_8 (const __m128i &value)
	Moves the higher 8 bits of eight 16 bit elements to the lower 8 bits and fills the high bits with 0. More...

static __m128i	moveHighBits16_8_5 (const __m128i &value)
	Moves the higher 8 bits of five 16 bit elements to the lower 8 bits and fills the high bits with 0. More...

static __m128i	moveHighBits16_8_6 (const __m128i &value)
	Moves the higher 8 bits of six 16 bit elements to the lower 8 bits and fills the high bits with 0. More...

static __m128i	moveHighBits16_8_7 (const __m128i &value)
	Moves the higher 8 bits of seven 16 bit elements to the lower 8 bits and fills the high bits with 0. More...

static __m128i	shuffleLow32ToLow32_8 (const __m128i &value)
	Shuffles the lower four 8 bits to the low 8 bits of four 32 bit elements. More...

static __m128i	shuffleNeighbor4Low64BitsToLow16_8 (const __m128i &value)
	Shuffles pairs of four neighbors of the low 64 bits to the low 8 bits of eight 16 bit elements. More...

static __m128i	shuffleNeighbor4High64BitsToLow16_8 (const __m128i &value)
	Shuffles pairs of four neighbors of the high 64 bits to the low 8 bits of eight 16 bit elements. More...

static __m128i	shuffleNeighbor2Low64BitsToLow16_8 (const __m128i &value)
	Shuffles pairs of two neighbors of the low 64 bits to the low 8 bits of eight 16 bit elements. More...

static __m128i	shuffleNeighbor2High64BitsToLow16_8 (const __m128i &value)
	Shuffles pairs of two neighbors of the high 64 bits to the low 8 bits of eight 16 bit elements. More...

static __m128i	bitMaskRemoveHigh16_8 ()
	Returns the following 128 bit mask: 0x00FF00FF-00FF00FF-00FF00FF-00FF00FF. More...

static __m128i	bitMaskRemoveHigh32_16 ()
	Returns the following 128 bit mask: 0x0000FFFF-0000FFFF-0000FFFF-0000FFFF. More...

static OCEAN_FORCE_INLINE void	multiplyInt8x16ToInt32x8 (const __m128i &values0, const __m128i &values1, __m128i &products0, __m128i &products1)
	Multiplies 8 int16_t values with 8 int16_t values and returns the products as 8 int32_t results. More...

static OCEAN_FORCE_INLINE void	multiplyInt8x16ToInt32x8AndAccumulate (const __m128i &values0, const __m128i &values1, __m128i &results0, __m128i &results1)
	Multiplies 8 int16_t values with 8 int16_t values and adds the products to 8 int32_t values. More...

template<>
__m128i	load_u8_10_upper_zero (const uint8_t *const buffer)

template<>
__m128i	load_u8_15_upper_zero (const uint8_t *const buffer)

template<>
__m128i	load_u8_13_lower_random (const uint8_t *const buffer)

template<>
__m128i	load_u8_15_lower_zero (const uint8_t *const buffer)

template<>
__m128i	load_u8_15_lower_random (const uint8_t *const buffer)

Static Private Member Functions
static unsigned int	interpolation2Channel16Bit1x1 (const uint8_t *const pixel, const unsigned int size, const unsigned int fx_y_, const unsigned int fxy_, const unsigned int fx_y, const unsigned int fxy)
	Returns the interpolated pixel values for one 2 channel 16 bit pixel. More...

Detailed Description

This class implements computer vision functions using SSE extensions.

Member Function Documentation

◆ addOffsetBeforeRightShiftDivisionByTwoSigned16Bit()

__m128i Ocean::CV::SSE::addOffsetBeforeRightShiftDivisionByTwoSigned16Bit ( const __m128i & value )

inlinestatic

Adds 1 to each signed 16 bit value which is both, negative and odd, so that each value can be right shifted by one bit to allow a correct division by two.

This function must be invoked before the right shift is applied.

Parameters

value The eight signed 16 bit values to be handled

Returns: The modified value for which divide (/ 2) and bit shift (>> 1) yield equal (and correct!) results

SSE does not have an intrinsic for integer division, so right bit shift is used instead. Unfortunately, for negative odd integer values v: (v / 2) != (v >> 1) because a right shift rounds towards negative infinity, e.g. -5 / 2 = -2 and -5 >> 1 = -3. As a work-around, an offset of 1 is added to all values that are both, negative and odd.

◆ addOffsetBeforeRightShiftDivisionByTwoSigned32Bit()

__m128i Ocean::CV::SSE::addOffsetBeforeRightShiftDivisionByTwoSigned32Bit ( const __m128i & value )

inlinestatic

Adds 1 to each signed 32 bit value which is both, negative and odd, so that each value can be right shifted by one bit to allow a correct division by two.

This function must be invoked before the right shift is applied.

Parameters

value The eight signed 32 bit values to be handled

Returns: The modified value for which divide (/ 2) and bit shift (>> 1) yield equal (and correct!) results

SSE does not have an intrinsic for integer division, so right bit shift is used instead. Unfortunately, for negative odd integer values v: (v / 2) != (v >> 1) because a right shift rounds towards negative infinity, e.g. -5 / 2 = -2 and -5 >> 1 = -3. As a work-around, an offset of 1 is added to all values that are both, negative and odd.

◆ addOffsetBeforeRightShiftDivisionSigned16Bit()

__m128i Ocean::CV::SSE::addOffsetBeforeRightShiftDivisionSigned16Bit	(	const __m128i &	value,
		const unsigned int	rightShifts
	)

inlinestatic

Adds 2^shifts - 1 to each negative signed 16 bit value, so they each value can be right shifted to allow a correct division by 2^shifts.

This function must be invoked before the right shift is applied.

Parameters

value	The eight signed 16 bit values to be handled
rightShifts	The number of right shifts which needs to be applied, with range [0, 15]

Returns: The modified value for which division a shift yield equal (and correct!) results

◆ addOffsetBeforeRightShiftDivisionSigned32Bit()

__m128i Ocean::CV::SSE::addOffsetBeforeRightShiftDivisionSigned32Bit	(	const __m128i &	value,
		const unsigned int	rightShifts
	)

inlinestatic

Adds 2^shifts - 1 to each negative signed 32 bit value, so they each value can be right shifted to allow a correct division by 2^shifts.

This function must be invoked before the right shift is applied.

Parameters

value	The eight signed 32 bit values to be handled
rightShifts	The number of right shifts which needs to be applied, with range [0, 31]

Returns: The modified value for which division a shift yield equal (and correct!) results

◆ average16Elements1Channel8Bit2x2()

void Ocean::CV::SSE::average16Elements1Channel8Bit2x2	(	const uint8_t *const	image0,
		const uint8_t *const	image1,
		uint8_t *const	result
	)

inlinestatic

Averages 16 elements of 2x2 blocks for 1 channel 8 bit frames.

The function takes two rows of 16 elements and returns 8 average elements (8 averaged pixels).

Parameters

image0	First row of 16 elements, must be valid
image1	Second row of 16 elements, must be valid
result	Resulting 8 average elements, must be valid

◆ average16Elements2Channel16Bit2x2()

void Ocean::CV::SSE::average16Elements2Channel16Bit2x2	(	const uint8_t *const	image0,
		const uint8_t *const	image1,
		uint8_t *const	result
	)

inlinestatic

Averages 16 elements of 2x2 blocks for 2 channel 16 bit frames.

The function takes two rows of 32 elements and returns 8 average elements (4 averaged pixels, each with 2 channels).

Parameters

image0	First row of 16 elements
image1	Second row of 16 elements
result	Resulting 8 average elements

◆ average16Elements4Channel32Bit2x2()

void Ocean::CV::SSE::average16Elements4Channel32Bit2x2	(	const uint8_t *const	image0,
		const uint8_t *const	image1,
		uint8_t *const	result
	)

inlinestatic

Averages 16 elements of 2x2 blocks for 4 channel 32 bit frames.

The function takes two rows of 16 elements and returns 8 average elements (2 averaged pixels, each with 4 channels).

Parameters

image0	First row of 16 elements
image1	Second row of 16 elements
result	Resulting 8 average elements

◆ average16ElementsBinary1Channel8Bit2x2()

void Ocean::CV::SSE::average16ElementsBinary1Channel8Bit2x2	(	const uint8_t *const	image0,
		const uint8_t *const	image1,
		uint8_t *const	result,
		const uint16_t	threshold = `776u`
	)

inlinestatic

Averages 16 elements of 2x2 blocks for 1 binary (x00 or 0xFF) frames.

The function takes two rows of 16 elements and returns 8 average elements (8 averaged pixels).

Parameters

image0	First row of 16 elements, must be valid
image1	Second row of 16 elements, must be valid
result	Resulting 8 average elements, must be valid
threshold	The minimal sum value of four pixels to result in a mask with value 255, with range [1, 255 * 4]

◆ average24Elements3Channel24Bit2x2()

void Ocean::CV::SSE::average24Elements3Channel24Bit2x2	(	const uint8_t *const	image0,
		const uint8_t *const	image1,
		uint8_t *const	result
	)

inlinestatic

Averages 24 elements of 2x2 blocks for 3 channel 24 bit frames.

The function takes two rows of 24 elements and returns 12 average elements (4 averaged pixels, each with 3 channels).

Parameters

image0	First row of 24 elements
image1	Second row of 24 elements
result	Resulting 12 average elements

◆ average30Elements1Channel8Bit3x3()

void Ocean::CV::SSE::average30Elements1Channel8Bit3x3	(	const uint8_t *const	image0,
		const uint8_t *const	image1,
		const uint8_t *const	image2,
		uint8_t *const	result
	)

inlinestatic

Averages 30 elements of 3x3 blocks for 1 channel 8 bit frames.

The function takes two rows of 30 elements and returns 10 average elements (10 averaged pixels).

Parameters

image0	First row of 30 elements
image1	Second row of 30 elements
image2	Third row of 30 elements
result	Resulting 10 average elements

| 1 2 1 |

1/16 | 2 4 2 | | 1 2 1 |

◆ average32Elements1Channel8Bit2x2()

void Ocean::CV::SSE::average32Elements1Channel8Bit2x2	(	const uint8_t *const	image0,
		const uint8_t *const	image1,
		uint8_t *const	result
	)

inlinestatic

Averages 32 elements of 2x2 blocks for 1 channel 8 bit frames.

The function takes two rows of 32 elements and returns 16 average elements (16 averaged pixels).

Parameters

image0	First row of 32 elements
image1	Second row of 32 elements
result	Resulting 16 average elements

◆ average32Elements2Channel16Bit2x2()

void Ocean::CV::SSE::average32Elements2Channel16Bit2x2	(	const uint8_t *const	image0,
		const uint8_t *const	image1,
		uint8_t *const	result
	)

inlinestatic

Averages 32 elements of 2x2 blocks for 2 channel 16 bit frames.

The function takes two rows of 32 elements and returns 16 average elements (8 averaged pixels, each with 2 channels).

Parameters

image0	First row of 32 elements
image1	Second row of 32 elements
result	Resulting 16 average elements

◆ average32Elements4Channel32Bit2x2()

void Ocean::CV::SSE::average32Elements4Channel32Bit2x2	(	const uint8_t *const	image0,
		const uint8_t *const	image1,
		uint8_t *const	result
	)

inlinestatic

Averages 32 elements of 2x2 blocks for 4 channel 32 bit frames.

The function takes two rows of 32 elements and returns 16 average elements (4 averaged pixels, each with 4 channels).

Parameters

image0	First row of 32 elements
image1	Second row of 32 elements
result	Resulting 16 average elements

◆ average32ElementsBinary1Channel8Bit2x2()

void Ocean::CV::SSE::average32ElementsBinary1Channel8Bit2x2	(	const uint8_t *const	image0,
		const uint8_t *const	image1,
		uint8_t *const	result,
		const uint16_t	threshold = `776u`
	)

inlinestatic

Averages 32 elements of 2x2 blocks for 1 binary (0x00 or 0xFF) frames.

The function takes two rows of 32 elements and returns 16 average elements (16 averaged pixels).

Parameters

image0	First row of 32 elements, must be valid
image1	Second row of 32 elements, must be valid
result	Resulting 16 average elements, must be valid
threshold	The minimal sum value of four pixels to result in a mask with value 255, with range [1, 255 * 4]

◆ average6Elements3Channel96Bit2x2()

void Ocean::CV::SSE::average6Elements3Channel96Bit2x2	(	const float *const	image0,
		const float *const	image1,
		float *const	result
	)

inlinestatic

Averages 6 elements of 2x2 blocks for 3 channel 96 bit frames.

The function takes two rows of 6 elements and returns 3 average elements (1 averaged pixels, each with 3 channels).

Parameters

image0	First row of 6 elements
image1	Second row of 6 elements
result	Resulting 3 average elements

◆ average8Elements1Channel32Bit2x2()

void Ocean::CV::SSE::average8Elements1Channel32Bit2x2	(	const float *const	image0,
		const float *const	image1,
		float *const	result
	)

inlinestatic

Averages 8 elements of 2x2 blocks for 1 channel 32 bit frames.

The function takes two rows of 8 elements and returns 4 average elements (4 averaged pixels).

Parameters

image0	First row of 8 elements
image1	Second row of 8 elements
result	Resulting 4 average elements

◆ average8Elements1Channel8Bit2x2()

void Ocean::CV::SSE::average8Elements1Channel8Bit2x2	(	const uint8_t *const	image0,
		const uint8_t *const	image1,
		uint8_t *const	result
	)

inlinestatic

Averages 8 elements of 2x2 blocks for 1 channel 8 bit frames.

The function takes two rows of 8 elements and returns 4 average elements (4 averaged pixels).

Parameters

image0	First row of 8 elements
image1	Second row of 8 elements
result	Resulting 4 average elements

◆ average8Elements2Channel16Bit2x2()

void Ocean::CV::SSE::average8Elements2Channel16Bit2x2	(	const uint8_t *const	image0,
		const uint8_t *const	image1,
		uint8_t *const	result
	)

inlinestatic

Averages 8 elements of 2x2 blocks for 2 channel 16 bit frames.

The function takes two rows of 8 elements and returns 4 average elements (2 averaged pixels, each with 2 channels).

Parameters

image0	First row of 8 elements
image1	Second row of 8 elements
result	Resulting 4 average elements

◆ average8Elements2Channel64Bit2x2()

void Ocean::CV::SSE::average8Elements2Channel64Bit2x2	(	const float *const	image0,
		const float *const	image1,
		float *const	result
	)

inlinestatic

Averages 8 elements of 2x2 blocks for 2 channel 64 bit frames.

The function takes two rows of 8 elements and returns 4 average elements (2 averaged pixels).

Parameters

image0	First row of 8 elements
image1	Second row of 8 elements
result	Resulting 4 average elements

◆ average8Elements4Channel128Bit2x2()

void Ocean::CV::SSE::average8Elements4Channel128Bit2x2	(	const float *const	image0,
		const float *const	image1,
		float *const	result
	)

inlinestatic

Averages 8 elements of 2x2 blocks for 4 channel 128 bit frames.

The function takes two rows of 8 elements and returns 4 average elements (1 averaged pixel).

Parameters

image0	First row of 8 elements
image1	Second row of 8 elements
result	Resulting 4 average elements

◆ average8ElementsBinary1Channel8Bit2x2()

void Ocean::CV::SSE::average8ElementsBinary1Channel8Bit2x2	(	const uint8_t *const	image0,
		const uint8_t *const	image1,
		uint8_t *const	result,
		const uint16_t	threshold = `776u`
	)

inlinestatic

Averages 8 elements of 2x2 blocks for 1 binary (x00 or 0xFF) frames.

The function takes two rows of 8 elements and returns 4 average elements (4 averaged pixels).

Parameters

image0	First row of 8 elements, must be valid
image1	Second row of 8 elements, must be valid
result	Resulting 4 average elementss, must be valid
threshold	The minimal sum value of four pixels to result in a mask with value 255, with range [1, 255 * 4]

◆ bitMaskRemoveHigh16_8()

__m128i Ocean::CV::SSE::bitMaskRemoveHigh16_8 ( )

inlinestatic

Returns the following 128 bit mask: 0x00FF00FF-00FF00FF-00FF00FF-00FF00FF.

Returns: Bitmask

◆ bitMaskRemoveHigh32_16()

__m128i Ocean::CV::SSE::bitMaskRemoveHigh32_16 ( )

inlinestatic

Returns the following 128 bit mask: 0x0000FFFF-0000FFFF-0000FFFF-0000FFFF.

Returns: Bitmask

◆ deInterleave3Channel8Bit15Elements()

OCEAN_FORCE_INLINE void Ocean::CV::SSE::deInterleave3Channel8Bit15Elements	(	const __m128i &	interleaved,
		__m128i &	channel01,
		__m128i &	channel2
	)

static

Deinterleaves 15 elements of e.g., and image with 3 channels and 8 bit per element.

This functions converts X CBA CBA CBA CBA CBA to 00000000000CCCCC 000BBBBB000AAAAA.

Parameters

interleaved	The 15 elements holding the interleaved image data
channel01	Resulting first and second channel elements, first 8 elements of the first channel, followed by 8 elements of the second channel
channel2	Resulting third channel elements, first 8 elements of the third channel, followed by zeros

◆ deInterleave3Channel8Bit24Elements()

OCEAN_FORCE_INLINE void Ocean::CV::SSE::deInterleave3Channel8Bit24Elements	(	const __m128i &	interleavedA,
		const __m128i &	interleavedB,
		__m128i &	channel01,
		__m128i &	channel2
	)

static

Deinterleaves 24 elements of e.g., and image with 3 channels and 8 bit per element.

This functions converts XX XXX XXX CBA CBA CB A CBA CBA CBA CBA CBA to 00000000CCCCCCCC BBBBBBBBAAAAAAAA.

Parameters

interleavedA	First 16 elements holding the interleaved image data
interleavedB	Second 16 elements holding the interleaved image data, the first 8 elements will be used only
channel01	Resulting first and second channel elements, first 8 elements of the first channel, followed by 8 elements of the second channel
channel2	Resulting third channel elements, first 8 elements of the third channel, followed by zeros

◆ deInterleave3Channel8Bit45Elements()

void Ocean::CV::SSE::deInterleave3Channel8Bit45Elements	(	const uint8_t *	interleaved,
		__m128i &	channel0,
		__m128i &	channel1,
		__m128i &	channel2
	)

inlinestatic

Deinterleaves 45 elements of e.g., an image with 3 channels and 8 bit per element.

Parameters

interleaved	45 elements of an image with 3 channels and 8 bit per element (45 bytes), must be valid
channel0	Resulting first channel holding all elements corresponding to the first channel consecutively
channel1	Resulting second channel holding all elements corresponding to the second channel consecutively
channel2	Resulting third channel holding all elements corresponding to the third channel consecutively

◆ deInterleave3Channel8Bit48Elements() [1/3]

OCEAN_FORCE_INLINE void Ocean::CV::SSE::deInterleave3Channel8Bit48Elements	(	const __m128i &	interleavedA,
		const __m128i &	interleavedB,
		const __m128i &	interleavedC,
		__m128i &	channel0,
		__m128i &	channel1,
		__m128i &	channel2
	)

static

Deinterleaves 48 elements of e.g., and image with 3 channels and 8 bit per element.

This functions converts CBA CBA CBA CBA CBA C BA CBA CBA CBA CBA CB A CBA CBA CBA CBA CBA to CCCCCCCCCCCCCCCC BBBBBBBBBBBBBBBB AAAAAAAAAAAAAAAA.

Parameters

interleavedA	First 16 elements holding the interleaved image data
interleavedB	Second 16 elements holding the interleaved image data
interleavedC	Third 16 elements holding the interleaved image data
channel0	Resulting first channel holding all elements corresponding to the first channel consecutively
channel1	Resulting second channel holding all elements corresponding to the second channel consecutively
channel2	Resulting third channel holding all elements corresponding to the third channel consecutively

◆ deInterleave3Channel8Bit48Elements() [2/3]

void Ocean::CV::SSE::deInterleave3Channel8Bit48Elements	(	const uint8_t *	interleaved,
		__m128i &	channel0,
		__m128i &	channel1,
		__m128i &	channel2
	)

inlinestatic

Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.

Parameters

interleaved	48 elements of an image with 3 channels and 8 bit per element (48 bytes)
channel0	Resulting first channel holding all elements corresponding to the first channel consecutively
channel1	Resulting second channel holding all elements corresponding to the second channel consecutively
channel2	Resulting third channel holding all elements corresponding to the third channel consecutively

◆ deInterleave3Channel8Bit48Elements() [3/3]

void Ocean::CV::SSE::deInterleave3Channel8Bit48Elements	(	const uint8_t *	interleaved,
		uint8_t *	channel0,
		uint8_t *	channel1,
		uint8_t *	channel2
	)

inlinestatic

Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.

Parameters

interleaved	48 elements of an image with 3 channels and 8 bit per element (48 bytes), must be valid
channel0	Resulting first channel holding all elements corresponding to the first channel consecutively, must be valid
channel1	Resulting second channel holding all elements corresponding to the second channel consecutively, must be valid
channel2	Resulting third channel holding all elements corresponding to the third channel consecutively, must be valid

◆ divideByRightShiftSigned16Bit()

__m128i Ocean::CV::SSE::divideByRightShiftSigned16Bit	(	const __m128i &	value,
		const unsigned int	rightShifts
	)

inlinestatic

Divides eight signed 16 bit values by applying a right shift.

This is able to determine the correct division result for positive and negative 16 bit values.

Parameters

value	The eight signed 16 bit values to be handled
rightShifts	The number of right shifts which needs to be applied, with range [0, 15]

Returns: The divided values

◆ divideByRightShiftSigned32Bit()

__m128i Ocean::CV::SSE::divideByRightShiftSigned32Bit	(	const __m128i &	value,
		const unsigned int	rightShifts
	)

inlinestatic

Divides eight signed 32 bit values by applying a right shift.

This is able to determine the correct division result for positive and negative 32 bit values.

Parameters

value	The eight signed 32 bit values to be handled
rightShifts	The number of right shifts which needs to be applied, with range [0, 32]

Returns: The divided values

◆ gradientHorizontalVertical8Elements1Channel8Bit()

void Ocean::CV::SSE::gradientHorizontalVertical8Elements1Channel8Bit	(	const uint8_t *	source,
		int8_t *	response,
		const unsigned int	width
	)

inlinestatic

Determines the horizontal and the vertical gradients for 16 following pixels for a given 1 channel 8 bit frame.

The resulting gradients are interleaved and each response is inside the range [-127, 127] as the standard response is divided by two.

Parameters

source	The source position of the first pixel to determine the gradient for, this pixel must not be a border pixel in the original frame
response	Resulting gradient responses, first the horizontal response then the vertical response (zipped) for 8 pixels
width	The width of the original frame in pixel, with range [10, infinity)

◆ gradientHorizontalVertical8Elements3Products1Channel8Bit()

void Ocean::CV::SSE::gradientHorizontalVertical8Elements3Products1Channel8Bit	(	const uint8_t *	source,
		int16_t *	response,
		const unsigned int	width
	)

inlinestatic

Determines the squared horizontal and vertical gradients and the product of both gradients for 16 following pixels for a given 1 channel 8 bit frame.

The resulting gradients are interleaved and each response is inside the range [-(127 * 127), 127 * 127] as the standard response is divided by two.

Parameters

source	The source position of the first pixel to determine the gradient for, this pixel must not be a border pixel in the original frame
response	Resulting gradient responses, first the horizontal response then the vertical response and afterwards the product of horizontal and vertical response (zipped) for 8 pixels
width	The width of the original frame in pixel, with range [10, infinity)

◆ interleave3Channel8Bit48Elements() [1/2]

OCEAN_FORCE_INLINE void Ocean::CV::SSE::interleave3Channel8Bit48Elements	(	const __m128i &	channel0,
		const __m128i &	channel1,
		const __m128i &	channel2,
		__m128i &	interleavedA,
		__m128i &	interleavedB,
		__m128i &	interleavedC
	)

static

Interleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.

This functions converts CCCCCCCCCCCCCCCC BBBBBBBBBBBBBBBB AAAAAAAAAAAAAAAA to CBA CBA CBA CBA CBA C BA CBA CBA CBA CBA CB A CBA CBA CBA CBA CBA.

Parameters

channel0	The 16 elements of the first channel to be interleaved
channel1	The 16 elements of the second channel to be interleaved
channel2	The 16 elements of the third channel to be interleaved
interleavedA	Resulting first 16 of the interleaved data
interleavedB	Resulting second 16 of the interleaved data
interleavedC	Resulting third 16 of the interleaved data

◆ interleave3Channel8Bit48Elements() [2/2]

OCEAN_FORCE_INLINE void Ocean::CV::SSE::interleave3Channel8Bit48Elements	(	const uint8_t *const	channel0,
		const uint8_t *const	channel1,
		const uint8_t *const	channel2,
		uint8_t *const	interleaved
	)

static

Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.

Parameters

channel0	The 16 elements of the first channel to be interleaved, must be valid
channel1	The 16 elements of the second channel to be interleaved, must be valid
channel2	The 16 elements of the third channel to be interleaved, must be valid
interleaved	The resulting 48 interleaved elements, must be valid

◆ interpolation1Channel8Bit15Elements()

__m128i Ocean::CV::SSE::interpolation1Channel8Bit15Elements	(	const __m128i &	values0,
		const __m128i &	values1,
		const __m128i &	fx_fy_fxfy_,
		const __m128i &	fx_fyfxfy
	)

inlinestatic

Interpolates 15 elements of 2x2 blocks for 1 channel 8 bit frames.

The interpolation is specified by tx and ty with range [0, 128u].

Parameters

values0	First row of 16 elements to be interpolated
values1	Second row of 16 elements to be interpolated
fx_fy_fxfy_	In each unsigned 16 bit element: ((128u - tx) * (128u - ty)) \| (tx * (128u - ty)) << 16
fx_fyfxfy	In each unsigned 16 bit element: (128u - tx) * ty \| (tx * ty) << 16

Returns: Interpolation result for 15 elements, which are (15 pixels)

◆ interpolation1Channel8Bit8Elements()

__m128i Ocean::CV::SSE::interpolation1Channel8Bit8Elements	(	const __m128i &	values0,
		const __m128i &	values1,
		const __m128i &	fx_fy_,
		const __m128i &	fxfy_,
		const __m128i &	fx_fy,
		const __m128i &	fxfy
	)

inlinestatic

Interpolates 8 elements of 2x2 blocks for 1 channel 8 bit frames.

The first interpolation element results from the first and second element of both rows.
The second interpolation element results from the second and third element of both rows.
...
The eighth interpolation element results from the eighth and ninth.
The interpolation is specified by tx and ty with range [0, 128u].

Parameters

values0	First row of 9 elements to be interpolated
values1	Second row of 9 elements to be interpolated
fx_fy_	In each unsigned 16 bit element: Product of (128u - tx) and (128u - ty)
fxfy_	In each unsigned 16 bit element: Product of (tx) and (128u - ty)
fx_fy	In each unsigned 16 bit element: Product of (128u - tx) and (ty)
fxfy	In each unsigned 16 bit element: Product of (tx) and (ty)

Returns: Interpolation result for 8 elements, which are 8 pixels

◆ interpolation2Channel16Bit1x1()

unsigned int Ocean::CV::SSE::interpolation2Channel16Bit1x1	(	const uint8_t *const	pixel,
		const unsigned int	size,
		const unsigned int	fx_y_,
		const unsigned int	fxy_,
		const unsigned int	fx_y,
		const unsigned int	fxy
	)

inlinestaticprivate

Returns the interpolated pixel values for one 2 channel 16 bit pixel.

Parameters

pixel	Upper left pixel in the frame
size	Size of one frame row in bytes
fx_y_	Product of the inverse fx and the inverse fy interpolation factor
fxy_	Product of the fx and the inverse fy interpolation factor
fx_y	Product of the inverse fx and the fy interpolation factor
fxy	Product of the fx and the fy interpolation factor

Returns: Interpolated pixel values

◆ interpolation2Channel16Bit8Elements()

__m128i Ocean::CV::SSE::interpolation2Channel16Bit8Elements	(	const __m128i &	values0,
		const __m128i &	values1,
		const __m128i &	fx_fy_,
		const __m128i &	fxfy_,
		const __m128i &	fx_fy,
		const __m128i &	fxfy
	)

inlinestatic

Interpolates 8 elements of 2x2 blocks for 2 channel 16 bit frames.

The first interpolation element results from the first and second element of both rows.
The second interpolation element results from the second and third element of both rows.
...
The eighth interpolation element results from the eighth and ninth.
The interpolation is specified by tx and ty with range [0, 128u].

Parameters

values0	First row of 10 elements to be interpolated
values1	Second row of 10 elements to be interpolated
fx_fy_	In each unsigned 16 bit element: Product of (128u - tx) and (128u - ty)
fxfy_	In each unsigned 16 bit element: Product of (tx) and (128u - ty)
fx_fy	In each unsigned 16 bit element: Product of (128u - tx) and (ty)
fxfy	In each unsigned 16 bit element: Product of (tx) and (ty)

Returns: Interpolation result for 8 elements, which are 4 pixels

◆ interpolation3Channel24Bit12Elements()

__m128i Ocean::CV::SSE::interpolation3Channel24Bit12Elements	(	const __m128i &	values0,
		const __m128i &	values1,
		const __m128i &	fx_fy_fxfy_,
		const __m128i &	fx_fyfxfy
	)

inlinestatic

Interpolates 12 elements of 2x2 blocks for 3 channel 24 bit frames.

The interpolation is specified by tx and ty with range [0, 128u].

Parameters

values0	First row of 15 elements to be interpolated
values1	Second row of 15 elements to be interpolated
fx_fy_fxfy_	In each unsigned 16 bit element: ((128u - tx) * (128u - ty)) \| (tx * (128u - ty)) << 16
fx_fyfxfy	In each unsigned 16 bit element: (128u - tx) * ty \| (tx * ty) << 16

Returns: Interpolation result for 12 elements, which are (4 pixels)

◆ interpolation3Channel24Bit8Elements()

__m128i Ocean::CV::SSE::interpolation3Channel24Bit8Elements	(	const __m128i &	values0,
		const __m128i &	values1,
		const __m128i &	fx_fy_,
		const __m128i &	fxfy_,
		const __m128i &	fx_fy,
		const __m128i &	fxfy
	)

inlinestatic

Interpolates 8 elements of 2x2 blocks for 3 channel 24 bit frames.

The first interpolation element results from the first and second element of both rows.
The second interpolation element results from the second and third element of both rows.
...
The eighth interpolation element results from the eighth and ninth.
The interpolation is specified by tx and ty with range [0, 128u].

Parameters

values0	First row of 11 elements to be interpolated
values1	Second row of 11 elements to be interpolated
fx_fy_	In each unsigned 16 bit element: Product of (128u - tx) and (128u - ty)
fxfy_	In each unsigned 16 bit element: Product of (tx) and (128u - ty)
fx_fy	In each unsigned 16 bit element: Product of (128u - tx) and (ty)
fxfy	In each unsigned 16 bit element: Product of (tx) and (ty)

Returns: Interpolation result for 8 elements, which are (2 2/3 pixels)

◆ interpolation4Channel32Bit2x4Elements()

__m128i Ocean::CV::SSE::interpolation4Channel32Bit2x4Elements	(	const __m128i &	values0,
		const __m128i &	values1,
		const __m128i &	fx_fy_,
		const __m128i &	fxfy_,
		const __m128i &	fx_fy,
		const __m128i &	fxfy
	)

inlinestatic

Interpolates 2x4 elements (two seperated blocks of 4 elements) of 2x2 blocks for 4 channel 32 bit frames.

The first interpolation element results from the first and second element of both rows.
The second interpolation element results from the second and third element of both rows.
...
The eighth interpolation element results from the eighth and ninth.
The interpolation is specified by tx and ty with range [0, 128u].

Parameters

values0	First row of 16 elements to be interpolated
values1	Second row of 16 elements to be interpolated
fx_fy_	In each unsigned 16 bit element: Product of (128u - tx) and (128u - ty)
fxfy_	In each unsigned 16 bit element: Product of (tx) and (128u - ty)
fx_fy	In each unsigned 16 bit element: Product of (128u - tx) and (ty)
fxfy	In each unsigned 16 bit element: Product of (tx) and (ty)

Returns: Interpolation result for 8 elements, which are (2 2/3 pixels)

◆ interpolation4Channel32Bit8Elements()

__m128i Ocean::CV::SSE::interpolation4Channel32Bit8Elements	(	const __m128i &	values0,
		const __m128i &	values1,
		const __m128i &	fx_fy_,
		const __m128i &	fxfy_,
		const __m128i &	fx_fy,
		const __m128i &	fxfy
	)

inlinestatic

Interpolates 8 elements of 2x2 blocks for 4 channel 32 bit frames.

The first interpolation element results from the first and second element of both rows.
The second interpolation element results from the second and third element of both rows.
...
The eighth interpolation element results from the eighth and ninth.
The interpolation is specified by tx and ty with range [0, 128u].

Parameters

values0	First row of 12 elements to be interpolated
values1	Second row of 12 elements to be interpolated
fx_fy_	In each unsigned 16 bit element: Product of (128u - tx) and (128u - ty)
fxfy_	In each unsigned 16 bit element: Product of (tx) and (128u - ty)
fx_fy	In each unsigned 16 bit element: Product of (128u - tx) and (ty)
fxfy	In each unsigned 16 bit element: Product of (tx) and (ty)

Returns: Interpolation result for 8 elements, which are (2 pixels)

◆ load128i()

__m128i Ocean::CV::SSE::load128i ( const void *const buffer )

inlinestatic

Loads a 128i value from the memory.

Parameters

buffer Buffer to be loaded (does not need to be aligned on any particular boundary), ensure that the buffer has a size of at least 16 bytes

Returns: Resulting value

◆ load128iLower64()

__m128i Ocean::CV::SSE::load128iLower64 ( const void *const buffer )

inlinestatic

Loads the lower 64 bit of a 128i value from the memory.

The upper 64 bit are zeroed.

Parameters

buffer Buffer to be loaded (does not need to be aligned on any particular boundary), ensure that the buffer has a size of at least 8 bytes

Returns: Resulting value

◆ load_u8_10_upper_zero() [1/2]

template<bool tBufferHas16Bytes>

__m128i Ocean::CV::SSE::load_u8_10_upper_zero ( const uint8_t *const buffer )

inlinestatic

Loads 10 bytes from memory, which holds either at least 16 bytes or exactly 10 bytes, to a 128i value and sets the remaining bytes of the resulting 128i value to zero.

The loaded memory will be stored in the upper 10 bytes of the 128i value while the lowest remaining 6 bytes will be set to zero. Thus, the resulting 128 bit value has the following byte pattern (high bits left, low bits right): [09 08 07 06 05 04 03 02 01 00 ZZ ZZ ZZ ZZ ZZ ZZ], with ZZ meaning zero.

Parameters

buffer Buffer to be loaded (does not need to be aligned on any particular boundary)

Returns: Resulting 128 bit value

Template Parameters

tBufferHas16Bytes True, if the buffer holds at least 16 bytes; False, if the buffer holds only 10 bytes

◆ load_u8_10_upper_zero() [2/2]

template<>

__m128i Ocean::CV::SSE::load_u8_10_upper_zero ( const uint8_t *const buffer )

inlinestatic

◆ load_u8_13_lower_random() [1/2]

template<bool tBufferHas16Bytes>

__m128i Ocean::CV::SSE::load_u8_13_lower_random ( const uint8_t *const buffer )

inlinestatic

Loads 13 bytes from memory, which holds either at least 16 bytes or exactly 13 bytes, to a 128i value while the remaining byte of the resulting 128i value will be random.

The loaded memory will be stored in the lower 13 bytes of the 128i value while the highest remaining 3 byte will be random.
Thus, the resulting 128 bit value has the following byte pattern (high bits left, low bits right): [?? ?? ?? 12 11 10 09 08 07 06 05 04 03 02 01 00], with ?? meaning a random value.

Parameters

buffer Buffer to be loaded (does not need to be aligned on any particular boundary)

Returns: Resulting 128 bit value

Template Parameters

tBufferHas16Bytes True, if the buffer holds at least 16 bytes; False, if the buffer holds only 13 bytes

◆ load_u8_13_lower_random() [2/2]

template<>

__m128i Ocean::CV::SSE::load_u8_13_lower_random ( const uint8_t *const buffer )

inlinestatic

◆ load_u8_15_lower_random() [1/2]

template<bool tBufferHas16Bytes>

__m128i Ocean::CV::SSE::load_u8_15_lower_random ( const uint8_t *const buffer )

inlinestatic

Loads 15 bytes from memory, which holds either at least 16 bytes or exactly 15 bytes, to a 128i value while the remaining byte of the resulting 128i value will be random.

The loaded memory will be stored in the lower 15 bytes of the 128i value while the highest remaining 1 byte will be random.
Thus, the resulting 128 bit value has the following byte pattern (high bits left, low bits right): [?? 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00], with ?? meaning a random value.

Parameters

buffer Buffer to be loaded (does not need to be aligned on any particular boundary)

Returns: Resulting 128 bit value

Template Parameters

tBufferHas16Bytes True, if the buffer holds at least 16 bytes; False, if the buffer holds only 15 bytes

◆ load_u8_15_lower_random() [2/2]

template<>

__m128i Ocean::CV::SSE::load_u8_15_lower_random ( const uint8_t *const buffer )

inlinestatic

◆ load_u8_15_lower_zero() [1/2]

template<bool tBufferHas16Bytes>

__m128i Ocean::CV::SSE::load_u8_15_lower_zero ( const uint8_t *const buffer )

inlinestatic

Loads 15 bytes from memory, which holds either at least 16 bytes or exactly 15 bytes, to a 128i value and sets the remaining byte of the resulting 128i value to zero.

The loaded memory will be stored in the lower 15 bytes of the 128i value while the highest remaining 1 byte will be set to zero.
Thus, the resulting 128 bit value has the following byte pattern (high bits left, low bits right): [– 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00], with ZZ meaning zero.

Parameters

buffer Buffer to be loaded (does not need to be aligned on any particular boundary)

Returns: Resulting 128 bit value

Template Parameters

tBufferHas16Bytes True, if the buffer holds at least 16 bytes; False, if the buffer holds only 15 bytes

◆ load_u8_15_lower_zero() [2/2]

template<>

__m128i Ocean::CV::SSE::load_u8_15_lower_zero ( const uint8_t *const buffer )

inlinestatic

◆ load_u8_15_upper_zero() [1/2]

template<bool tBufferHas16Bytes>

__m128i Ocean::CV::SSE::load_u8_15_upper_zero ( const uint8_t *const buffer )

inlinestatic

Loads 15 bytes from memory, which holds either at least 16 bytes or exactly 15 bytes, to a 128i value and sets the remaining byte of the resulting 128i value to zero.

The loaded memory will be stored in the upper 15 bytes of the 128i value while the lowest remaining 1 byte will be set to zero. Thus, the resulting 128 bit value has the following byte pattern (high bits left, low bits right): [14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 ZZ], with ZZ meaning zero.

Parameters

buffer Buffer to be loaded (does not need to be aligned on any particular boundary)

Returns: Resulting 128 bit value

Template Parameters

tBufferHas16Bytes True, if the buffer holds at least 16 bytes; False, if the buffer holds only 15 bytes

◆ load_u8_15_upper_zero() [2/2]

template<>

__m128i Ocean::CV::SSE::load_u8_15_upper_zero ( const uint8_t *const buffer )

inlinestatic

◆ load_u8_16_and_shift_right()

template<unsigned int tShiftBytes>

__m128i Ocean::CV::SSE::load_u8_16_and_shift_right ( const uint8_t *const buffer )

inlinestatic

Loads 16 bytes from memory which is at least 16 bytes large and shifts the 128i value by a specified number of bytes to the right (by inserting zeros).

This function can be used if the remaining buffer is smaller than 16 bytes while the buffer exceeds/continues in the lower address space (from the original point of interest).
Thus, this function an handle a buffer with the following pattern (with lower address left and high address right):
| ?? ?? ?? ?? ?? ?? ?? ?? ?? V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 |, where ?? represent random values in our buffer (in the lower address space), and VX represent the values of interest and V0 the location to which 'buffer' is pointing to.
by load_u8_16_and_shift_right<6>(buffer - 6);
The resulting 128i register will then be composed of (high bits left, low bits right): [00 00 00 00 00 00 V9 V8 V7 V6 V5 V4 V3 V2 V1 V0].

Parameters

buffer The actual address from which the 16 bytes will be loaded, must be valid and must be at least 16 bytes large

Returns: The resulting 128 bit value

Template Parameters

tShiftBytes The number of bytes which will be shifted (to the right) after the memory has loaded, with range [0, 16]

◆ moveHighBits16_8()

__m128i Ocean::CV::SSE::moveHighBits16_8 ( const __m128i & value )

inlinestatic

Moves the higher 8 bits of eight 16 bit elements to the lower 8 bits and fills the high bits with 0.

Given: PONM-LKJI-HGFE-DCBA
Result: 0P0N-0L0J-0H0F-0D0B

Parameters

value Value to remove the high bits for

Returns: Result

◆ moveHighBits16_8_5()

__m128i Ocean::CV::SSE::moveHighBits16_8_5 ( const __m128i & value )

inlinestatic

Moves the higher 8 bits of five 16 bit elements to the lower 8 bits and fills the high bits with 0.

Given: PONM-LKJI-HGFE-DCBA
Result: 0000-000J-0H0F-0D0B

Parameters

value Value to remove the high bits for

Returns: Result

◆ moveHighBits16_8_6()

__m128i Ocean::CV::SSE::moveHighBits16_8_6 ( const __m128i & value )

inlinestatic

Moves the higher 8 bits of six 16 bit elements to the lower 8 bits and fills the high bits with 0.

Given: PONM-LKJI-HGFE-DCBA
Result: 0000-0L0J-0H0F-0D0B

Parameters

value Value to remove the high bits for

Returns: Result

◆ moveHighBits16_8_7()

__m128i Ocean::CV::SSE::moveHighBits16_8_7 ( const __m128i & value )

inlinestatic

Moves the higher 8 bits of seven 16 bit elements to the lower 8 bits and fills the high bits with 0.

Given: PONM-LKJI-HGFE-DCBA
Result: 000N-0L0J-0H0F-0D0B

Parameters

value Value to remove the high bits for

Returns: Result

◆ moveHighBits32_16()

__m128i Ocean::CV::SSE::moveHighBits32_16 ( const __m128i & value )

inlinestatic

Moves the higher 16 bits of four 32 bit elements to the lower 16 bits and fills the high bits with 0.

Given: PONM-LKJI-HGFE-DCBA
Result: 00PO-00LK-00HG-00DC

Parameters

value Value to remove the high bits for

Returns: Result

◆ moveLowBits16_8ToHigh64()

__m128i Ocean::CV::SSE::moveLowBits16_8ToHigh64 ( const __m128i & value )

inlinestatic

Moves the lower 8 bits of eight 16 bit elements to the higher 64 bits and fills the low 64 bits with 0.

Given: PONM-LKJI-HGFE-DCBA
Result: OMKI-GECA-0000-0000

Parameters

value Value to remove the high bits for

Returns: Result

◆ moveLowBits16_8ToLow64()

__m128i Ocean::CV::SSE::moveLowBits16_8ToLow64 ( const __m128i & value )

inlinestatic

Moves the lower 8 bits of eight 16 bit elements to the lower 64 bits and fills the high 64 bits with 0.

Given: PONM-LKJI-HGFE-DCBA
Result: 0000-0000-OMKI-GECA

Parameters

value Value to remove the high bits for

Returns: Result

◆ moveLowBits32_16ToLow64()

__m128i Ocean::CV::SSE::moveLowBits32_16ToLow64 ( const __m128i & value )

inlinestatic

Moves the lower 16 bits of four 32 bit elements to the lower 64 bits and fills the high 64 bits with 0.

Given: PONM-LKJI-HGFE-DCBA
Result: 0000-0000-NMJI-FEBA

Parameters

value Value to remove the high bits for

Returns: Result

◆ moveLowBits32_8ToLow32()

__m128i Ocean::CV::SSE::moveLowBits32_8ToLow32 ( const __m128i & value )

inlinestatic

Moves the lower 8 bits of four 32 bit elements to the lower 32 bits and fills the high 96 bits with 0.

Given: PONM-LKJI-HGFE-DCBA
Result: 0000-0000-0000-MIEA

Parameters

value Value to remove the high bits for

Returns: Result

◆ multiplyInt8x16ToInt32x8()

OCEAN_FORCE_INLINE void Ocean::CV::SSE::multiplyInt8x16ToInt32x8	(	const __m128i &	values0,
		const __m128i &	values1,
		__m128i &	products0,
		__m128i &	products1
	)

static

Multiplies 8 int16_t values with 8 int16_t values and returns the products as 8 int32_t results.

The pseudo code of the function is as follows:

products0[0] = values0[0] * values1[0]
...
products0[3] = values0[3] * values1[3]

products1[0] = values0[4] * values1[4]
...
products1[3] = values0[7] * values1[7]

Parameters

values0	The first 8 int16_t values to be multiplied
values1	The second 8 int16_t values to be multiplied
products0	The resulting first 4 int32_t products
products1	The resulting second 4 int32_t products

◆ multiplyInt8x16ToInt32x8AndAccumulate()

OCEAN_FORCE_INLINE void Ocean::CV::SSE::multiplyInt8x16ToInt32x8AndAccumulate	(	const __m128i &	values0,
		const __m128i &	values1,
		__m128i &	results0,
		__m128i &	results1
	)

static

Multiplies 8 int16_t values with 8 int16_t values and adds the products to 8 int32_t values.

The pseudo code of the function is as follows:

results0[0] += values0[0] * values1[0]
...
results0[3] += values0[3] * values1[3]

results1[0] += values0[4] * values1[4]
...
results1[3] += values0[7] * values1[7]

Parameters

values0	The first 8 int16_t values to be multiplied
values1	The second 8 int16_t values to be multiplied
results0	The results to which the first 4 int32_t products will be added
results1	The results to which the second 4 int32_t products will be added

◆ prefetchNTA()

void Ocean::CV::SSE::prefetchNTA ( const void *const data )

inlinestatic

Prefetches a block of non-temporal memory into non-temporal cache structure.

Parameters

data	Data to be prefetched

◆ prefetchT0()

void Ocean::CV::SSE::prefetchT0 ( const void *const data )

inlinestatic

Prefetches a block of temporal memory into all cache levels.

Parameters

data	Data to be prefetched

◆ prefetchT1()

void Ocean::CV::SSE::prefetchT1 ( const void *const data )

inlinestatic

Prefetches a block of temporal memory in all cache levels except 0th cache level.

Parameters

data	Data to be prefetched

◆ prefetchT2()

void Ocean::CV::SSE::prefetchT2 ( const void *const data )

inlinestatic

Prefetches a block of temporal memory in all cache levels, except 0th and 1st cache levels.

Parameters

data	Data to be prefetched

◆ removeHighBits16_8()

__m128i Ocean::CV::SSE::removeHighBits16_8 ( const __m128i & value )

inlinestatic

Removes the higher 8 bits of eight 16 bit elements.

Given: PONM-LKJI-HGFE-DCBA
Result: 0O0M-0K0I-0G0E-0C0A

Parameters

value Value to remove the high bits for

Returns: Result

◆ removeHighBits16_8_7_lower()

__m128i Ocean::CV::SSE::removeHighBits16_8_7_lower ( const __m128i & value )

inlinestatic

Removes the higher 8 bits of eight 16 bit elements and sets the upper two bytes to zero.

Given: PONM-LKJI-HGFE-DCBA
Result: 000M-0K0I-0G0E-0C0A

Parameters

value Value to remove the high bits for

Returns: Result

◆ removeHighBits16_8_7_upper()

__m128i Ocean::CV::SSE::removeHighBits16_8_7_upper ( const __m128i & value )

inlinestatic

Removes the higher 8 bits of eight 16 bit elements and sets the lower two bytes to zero.

Given: PONM-LKJI-HGFE-DCBA
Result: 0O0M-0K0I-0G0E-0C00

Parameters

value Value to remove the high bits for

Returns: Result

◆ removeHighBits32_16()

__m128i Ocean::CV::SSE::removeHighBits32_16 ( const __m128i & value )

inlinestatic

Removes the higher 16 bits of four 32 bit elements.

Given: PONM-LKJI-HGFE-DCBA
Result: 00NM-00JI-00FE-00BA

Parameters

value Value to remove the high bits for

Returns: Result

◆ removeLowBits32_16()

__m128i Ocean::CV::SSE::removeLowBits32_16 ( const __m128i & value )

inlinestatic

Removes the lower 16 bits of four 32 bit elements.

Given: PONM-LKJI-HGFE-DCBA
Result: PO00-LK00-HG00-DC00

Parameters

value Value to remove the lower bits for

Returns: Result

◆ reverseChannelOrder2Channel8Bit32Elements()

OCEAN_FORCE_INLINE void Ocean::CV::SSE::reverseChannelOrder2Channel8Bit32Elements	(	const uint8_t *	interleaved,
		uint8_t *	reversedInterleaved
	)

static

Reverses the order of the channels of 16 pixels (32 elements) of an image with 2 interleaved channels and 8 bit per element (e.g., YA16 to AY16).

Parameters

interleaved	16 elements of an image with 2 channels and 8 bit per element (32 bytes)
reversedInterleaved	Resulting 32 elements with reversed channel order

◆ reverseChannelOrder3Channel8Bit48Elements() [1/3]

OCEAN_FORCE_INLINE void Ocean::CV::SSE::reverseChannelOrder3Channel8Bit48Elements	(	const __m128i &	interleaved0,
		const __m128i &	interleaved1,
		const __m128i &	interleaved2,
		__m128i &	reversedInterleaved0,
		__m128i &	reversedInterleaved1,
		__m128i &	reversedInterleaved2
	)

static

Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channels and 8 bit per element.

Parameters

interleaved0	First 16 elements holding the interleaved image data
interleaved1	Second 16 elements holding the interleaved image data
interleaved2	Third 16 elements holding the interleaved image data
reversedInterleaved0	Resulting first 16 elements holding the interleaved image data with reversed channel order
reversedInterleaved1	Resulting second 16 elements holding the interleaved image data with reversed channel order
reversedInterleaved2	Resulting third 16 elements holding the interleaved image data with reversed channel order

◆ reverseChannelOrder3Channel8Bit48Elements() [2/3]

OCEAN_FORCE_INLINE void Ocean::CV::SSE::reverseChannelOrder3Channel8Bit48Elements	(	const uint8_t *	interleaved,
		uint8_t *	reversedInterleaved
	)

static

Reverses the order of the first and last channel of 48 elements (16 pixels) of an image with 3 interleaved channels and 8 bit per element (e.g., RGB24 to BGR24).

Parameters

interleaved	48 elements of an image with 3 channels and 8 bit per element (48 bytes)
reversedInterleaved	Resulting 48 elements with reversed channel order

◆ reverseChannelOrder3Channel8Bit48Elements() [3/3]

void Ocean::CV::SSE::reverseChannelOrder3Channel8Bit48Elements ( uint8_t * interleaved )

inlinestatic

Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channels and 8 bit per element (in place).

Parameters

interleaved 48 elements of an image with 3 channels and 8 bit per element (48 bytes)

◆ reverseChannelOrder4Channel8Bit64Elements()

OCEAN_FORCE_INLINE void Ocean::CV::SSE::reverseChannelOrder4Channel8Bit64Elements	(	const uint8_t *	interleaved,
		uint8_t *	reversedInterleaved
	)

static

Reverses the order of the channels of 16 pixels (64 elements) of an image with 4 interleaved channels and 8 bit per element (e.g., RGBA32 to ABGR24).

Parameters

interleaved	64 elements of an image with 4 channels and 8 bit per element (64 bytes)
reversedInterleaved	Resulting 64 elements with reversed channel order

◆ reverseElements8Bit48Elements() [1/3]

void Ocean::CV::SSE::reverseElements8Bit48Elements	(	const __m128i &	elements0,
		const __m128i &	elements1,
		const __m128i &	elements2,
		__m128i &	reversedElements0,
		__m128i &	reversedElements1,
		__m128i &	reversedElements2
	)

inlinestatic

Reverses the order of 48 elements with 8 bit per element.

Parameters

elements0	First 16 elements
elements1	Second 16 elements
elements2	Third 16 elements
reversedElements0	Resulting reversed first 16 elements
reversedElements1	Resulting reversed second 16 elements
reversedElements2	Resulting reversed third 16 elements

◆ reverseElements8Bit48Elements() [2/3]

void Ocean::CV::SSE::reverseElements8Bit48Elements	(	const uint8_t *	elements,
		uint8_t *	reversedElements
	)

inlinestatic

Reverses the order of 48 elements with 8 bit per element.

Parameters

elements	48 elements that will be reversed
reversedElements	Resulting reversed 48 elements

◆ reverseElements8Bit48Elements() [3/3]

void Ocean::CV::SSE::reverseElements8Bit48Elements ( uint8_t * elements )

inlinestatic

Reverses the order of 48 elements with 8 bit per element (in place).

Parameters

elements 48 elements that will be reversed

◆ set128i()

__m128i Ocean::CV::SSE::set128i	(	const unsigned long long	high64,
		const unsigned long long	low64
	)

inlinestatic

Sets a 128i value by two 64 bit values.

Parameters

high64	High 64 bits to be set
low64	Low 64 bits to be set

Returns: Resulting 128i value

◆ shiftAndMirrorChannelToBack4Channel32Bit()

void Ocean::CV::SSE::shiftAndMirrorChannelToBack4Channel32Bit	(	const uint8_t *	elements,
		uint8_t *	shiftedElements
	)

inlinestatic

Shifts the channels of a 4 channel 32 bit pixels to the back and moves the back channel to the front channel and mirrors the four individual pixels.

Parameters

elements	16 elements of 4 pixels to be shifted and mirrored
shiftedElements	Resulting shifted and mirrored elements

◆ shiftAndMirrorChannelToFront4Channel32Bit()

void Ocean::CV::SSE::shiftAndMirrorChannelToFront4Channel32Bit	(	const uint8_t *	elements,
		uint8_t *	shiftedElements
	)

inlinestatic

Shifts the channels of a 4 channel 32 bit pixels to the front and moves the front channel to the back channel and mirrors the four individual pixels.

Parameters

elements	16 elements of 4 pixels to be shifted and mirrored
shiftedElements	Resulting shifted and mirrored elements

◆ shiftChannelToBack4Channel32Bit()

void Ocean::CV::SSE::shiftChannelToBack4Channel32Bit	(	const uint8_t *	elements,
		uint8_t *	shiftedElements
	)

inlinestatic

Shifts the channels of a 4 channel 32 bit pixels to the back and moves the back channel to the front channel.

The function takes four pixels DCBA DCBA DCBA DCBA and provides CBAD CBAD CBAD CBAD.

Parameters

elements	16 elements of 4 pixels to be shifted
shiftedElements	Resulting shifted elements

◆ shiftChannelToFront4Channel32Bit()

void Ocean::CV::SSE::shiftChannelToFront4Channel32Bit	(	const uint8_t *	elements,
		uint8_t *	shiftedElements
	)

inlinestatic

Shifts the channels of a 4 channel 32 bit pixels to the front and moves the front channel to the back channel.

The function takes four pixels DCBA DCBA DCBA DCBA and provides ADCB ADCB ADCB ADCB.

Parameters

elements	16 elements of 4 pixels to be shifted
shiftedElements	Resulting shifted elements

◆ shuffleLow32ToLow32_8()

__m128i Ocean::CV::SSE::shuffleLow32ToLow32_8 ( const __m128i & value )

inlinestatic

Shuffles the lower four 8 bits to the low 8 bits of four 32 bit elements.

Given: PONM-LKJI-HGFE-DCBA
Result: 000D-000C-000B-000A

Parameters

value Value to be shuffled

Returns: Result

◆ shuffleNeighbor2High64BitsToLow16_8()

__m128i Ocean::CV::SSE::shuffleNeighbor2High64BitsToLow16_8 ( const __m128i & value )

inlinestatic

Shuffles pairs of two neighbors of the high 64 bits to the low 8 bits of eight 16 bit elements.

Parameters

value Value to be shuffled

Returns: Result

◆ shuffleNeighbor2Low64BitsToLow16_8()

__m128i Ocean::CV::SSE::shuffleNeighbor2Low64BitsToLow16_8 ( const __m128i & value )

inlinestatic

Shuffles pairs of two neighbors of the low 64 bits to the low 8 bits of eight 16 bit elements.

Parameters

value Value to be shuffled

Returns: Result

◆ shuffleNeighbor4High64BitsToLow16_8()

__m128i Ocean::CV::SSE::shuffleNeighbor4High64BitsToLow16_8 ( const __m128i & value )

inlinestatic

Shuffles pairs of four neighbors of the high 64 bits to the low 8 bits of eight 16 bit elements.

Given: PONM-LKJI-HGFE-DCBA
Result: 0P0L-0O0K-0N0J-0M0I

Parameters

value Value to be shuffled

Returns: Result

◆ shuffleNeighbor4Low64BitsToLow16_8()

__m128i Ocean::CV::SSE::shuffleNeighbor4Low64BitsToLow16_8 ( const __m128i & value )

inlinestatic

Shuffles pairs of four neighbors of the low 64 bits to the low 8 bits of eight 16 bit elements.

Given: PONM-LKJI-HGFE-DCBA
Result: 0H0D-0G0C-0F0B-0E0A

Parameters

value Value to be shuffled

Returns: Result

◆ ssd2Channel16Bit1x1() [1/2]

unsigned int Ocean::CV::SSE::ssd2Channel16Bit1x1	(	const uint8_t *const	pixel0,
		const uint8_t *const	pixel1,
		const unsigned int	size0,
		const unsigned int	size1,
		const unsigned int	f0x_y_,
		const unsigned int	f0xy_,
		const unsigned int	f0x_y,
		const unsigned int	f0xy,
		const unsigned int	f1x_y_,
		const unsigned int	f1xy_,
		const unsigned int	f1x_y,
		const unsigned int	f1xy
	)

inlinestatic

Returns the interpolated sum of square difference for one 2 channel 16 bit pixel.

Parameters

pixel0	Uppler left pixel in the first frame
pixel1	Uppler left pixel in the second frame
size0	Size of one frame row in bytes
size1	Size of one frame row in bytes
f0x_y_	Product of the inverse fx and the inverse fy interpolation factor for the first image
f0xy_	Product of the fx and the inverse fy interpolation factor for the first image
f0x_y	Product of the inverse fx and the fy interpolation factor for the first image
f0xy	Product of the fx and the fy interpolation factor for the first image
f1x_y_	Product of the inverse fx and the inverse fy interpolation factor for the second image
f1xy_	Product of the fx and the inverse fy interpolation factor for the second image
f1x_y	Product of the inverse fx and the fy interpolation factor for the second image
f1xy	Product of the fx and the fy interpolation factor for the second image

Returns: Interpolated sum of square difference

◆ ssd2Channel16Bit1x1() [2/2]

unsigned int Ocean::CV::SSE::ssd2Channel16Bit1x1	(	const uint8_t *const	pixel0,
		const uint8_t *const	pixel1,
		const unsigned int	size0,
		const unsigned int	size1,
		const unsigned int	f1x_y_,
		const unsigned int	f1xy_,
		const unsigned int	f1x_y,
		const unsigned int	f1xy
	)

inlinestatic

Returns the interpolated sum of square difference for one 2 channel 16 bit pixel.

Parameters

pixel0	Uppler left pixel in the first frame
pixel1	Uppler left pixel in the second frame
size0	Size of one frame row in bytes
size1	Size of one frame row in bytes
f1x_y_	Product of the inverse fx and the inverse fy interpolation factor for the second image
f1xy_	Product of the fx and the inverse fy interpolation factor for the second image
f1x_y	Product of the inverse fx and the fy interpolation factor for the second image
f1xy	Product of the fx and the fy interpolation factor for the second image

Returns: Interpolated sum of square difference

◆ store128i()

void Ocean::CV::SSE::store128i	(	const __m128i &	value,
		uint8_t *const	buffer
	)

inlinestatic

Stores a 128i value to the memory.

Parameters

value	Value to be stored
buffer	Buffer receiving the value (does not need to be aligned on any particular boundary)

◆ sum1Channel8Bit16Elements() [1/2]

__m128i Ocean::CV::SSE::sum1Channel8Bit16Elements ( const __m128i & elements )

inlinestatic

Sums 16 elements with 8 bit per element.

The results are stored as first 32 bit integer value (high bits left, low bits right): ???? ???? ???? 0000.

Parameters

elements 16 elements holding the image data

Returns: Resulting sums

◆ sum1Channel8Bit16Elements() [2/2]

__m128i Ocean::CV::SSE::sum1Channel8Bit16Elements ( const uint8_t * elements )

inlinestatic

Sums 16 elements with 8 bit per element.

The results are stored as first 32 bit integer value (high bits left, low bits right): ???? ???? ???? 0000.

Parameters

elements 16 elements holding the image data

Returns: Resulting sums

◆ sum1Channel8BitBack15Elements()

__m128i Ocean::CV::SSE::sum1Channel8BitBack15Elements ( const uint8_t * elements )

inlinestatic

Sums the last 15 elements of a 16 elements buffer with 8 bit per element, the beginning 1 element is interpreted as zero.

However, the provided buffer must be at least 16 bytes large as the entire 16 bytes will be loaded to the SSE register.
Thus, this functions handles one buffer with this pattern (while the memory starts left and ends right): [NA 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15]. The results are stored as first 32 bit integer value (high bits left, low bits right): ???? ???? ???? 0000.

Parameters

elements (1+) 15 elements holding the image data

Returns: Resulting sum

◆ sum1Channel8BitFront15Elements()

template<bool tBufferHas16Bytes>

__m128i Ocean::CV::SSE::sum1Channel8BitFront15Elements ( const uint8_t * elements )

inlinestatic

Sums the first 15 elements of a buffer with 8 bit per element.

This function supports to load the 15 elements from a buffer with only 15 bytes or with a buffer with at least 16 bytes.
If the provided buffer holds at least 16 bytes the load function is much faster compared to the case if the buffer is not larger than 15 bytes.
The results are stored as first 32 bit integer value (high bits left, low bits right): ???? ???? ???? 0000.

Parameters

elements 16 elements holding the image data

Returns: Resulting sums

Template Parameters

tBufferHas16Bytes True, if the buffer holds at least 16 bytes; False, if the buffer holds only 15 bytes

◆ sum_f32_4()

OCEAN_FORCE_INLINE float Ocean::CV::SSE::sum_f32_4 ( const __m128 & value )

static

Adds the four (all four) individual 32 bit float of a m128 value and returns the result.

Parameters

value The value which elements will be added

Returns: The resulting sum value

◆ sum_f64_2()

OCEAN_FORCE_INLINE double Ocean::CV::SSE::sum_f64_2 ( const __m128d & value )

static

Adds the two (all two) individual 64 bit float of a m128 value and returns the result.

Parameters

value The value which elements will be added

Returns: The resulting sum value

◆ sum_u32_4()

OCEAN_FORCE_INLINE unsigned int Ocean::CV::SSE::sum_u32_4 ( const __m128i & value )

static

Adds the four (all four) individual 32 bit unsigned integer values of a m128i value and returns the result.

Parameters

value The value which elements will be added

Returns: The resulting sum value

◆ sum_u32_first_2()

unsigned int Ocean::CV::SSE::sum_u32_first_2 ( const __m128i & value )

inlinestatic

Adds the first two individual 32 bit unsigned integer values of a m128i value and returns the result.

Parameters

value The value which elements will be added

Returns: The resulting sum value

◆ sum_u32_first_third()

unsigned int Ocean::CV::SSE::sum_u32_first_third ( const __m128i & value )

inlinestatic

Adds the first and the second 32 bit unsigned integer values of a m128i value and returns the result.

Parameters

value The value which elements will be added

Returns: The resulting sum value

◆ sumAbsoluteDifferences8Bit16Elements()

__m128i Ocean::CV::SSE::sumAbsoluteDifferences8Bit16Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum absolute differences determination for 16 elements of an 16 elements buffer with 8 bit precision.

Parameters

image0	First 16 elements to determine the ssd for, may be non aligned
image1	Second 16 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

◆ sumAbsoluteDifferences8BitBack11Elements()

__m128i Ocean::CV::SSE::sumAbsoluteDifferences8BitBack11Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum absolute differences determination for the last 11 elements of a 16 elements buffer with 8 bit precision.

Parameters

image0	First 11 elements to determine the sad for, may be non aligned
image1	Second 11 elements to determine the sad for, may be non aligned

Returns: SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

◆ sumAbsoluteDifferences8BitFront10Elements()

template<bool tBufferHas16Bytes>

__m128i Ocean::CV::SSE::sumAbsoluteDifferences8BitFront10Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum absolute differences determination for the first 10 elements of a buffer with 8 bit precision.

This function supports to load the 10 elements from a buffer with only 10 bytes or with a buffer with at least 16 bytes.

Parameters

image0	First 10 elements to determine the sad for, may be non aligned
image1	Second 10 elements to determine the sad for, may be non aligned

Returns: SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

Template Parameters

tBufferHas16Bytes True, if the buffer holds at least 16 bytes; False, if the buffer holds 10 bytes only

◆ sumAbsoluteDifferences8BitFront15Elements()

template<bool tBufferHas16Bytes>

__m128i Ocean::CV::SSE::sumAbsoluteDifferences8BitFront15Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum absolute differences determination for the first 15 elements of a buffer with 8 bit precision.

This function supports to load the 15 elements from a buffer with only 15 bytes or with a buffer with at least 16 bytes.

Parameters

image0	First 15 elements to determine the sad for, may be non aligned
image1	Second 15 elements to determine the sad for, may be non aligned

Returns: SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

Template Parameters

tBufferHas16Bytes True, if the buffer holds at least 16 bytes; False, if the buffer holds 15 bytes only

◆ sumInterleave3Channel8Bit45Elements()

__m128i Ocean::CV::SSE::sumInterleave3Channel8Bit45Elements ( const uint8_t * interleaved )

inlinestatic

Sums 15 elements individually for an interleaved pixel format with 3 channels and 8 bit per channel and element.

The results are stored in three 32 bit integer values (high bits left, low bits right): ???? 2222 1111 0000.

Parameters

interleaved 45 elements holding the interleaved image data

Returns: Resulting sums

◆ sumInterleave3Channel8Bit48Elements() [1/2]

__m128i Ocean::CV::SSE::sumInterleave3Channel8Bit48Elements	(	const __m128i &	interleaved0,
		const __m128i &	interleaved1,
		const __m128i &	interleaved2
	)

inlinestatic

Sums 16 elements individually for an interleaved pixel format with 3 channels and 8 bit per channel and element.

The results are stored in three 32 bit integer values (high bits left, low bits right): ???? 2222 1111 0000.

Parameters

interleaved0	First 16 elements holding the interleaved image data
interleaved1	Second 16 elements holding the interleaved image data
interleaved2	Third 16 elements holding the interleaved image data

Returns: Resulting sums

◆ sumInterleave3Channel8Bit48Elements() [2/2]

__m128i Ocean::CV::SSE::sumInterleave3Channel8Bit48Elements ( const uint8_t * interleaved )

inlinestatic

Sums 16 elements individually for an interleaved pixel format with 3 channels and 8 bit per channel and element.

The results are stored in three 32 bit integer values (high bits left, low bits right): ???? 2222 1111 0000.

Parameters

interleaved 48 elements holding the interleaved image data

Returns: Resulting sums

◆ sumSquareDifference8Bit16Elements() [1/2]

__m128i Ocean::CV::SSE::sumSquareDifference8Bit16Elements	(	const __m128i &	row0,
		const __m128i &	row1
	)

inlinestatic

Sum square difference determination for 16 elements with 8 bit precision.

Parameters

row0	First 16 elements to determine the ssd for
row1	Second 16 elements to determine the ssd for

Returns: SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

◆ sumSquareDifference8Bit16Elements() [2/2]

__m128i Ocean::CV::SSE::sumSquareDifference8Bit16Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square difference determination for 16 elements with 8 bit precision.

Parameters

image0	First 16 elements to determine the ssd for, may be non aligned
image1	Second 16 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

◆ sumSquareDifference8Bit16ElementsAligned16()

__m128i Ocean::CV::SSE::sumSquareDifference8Bit16ElementsAligned16	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square difference determination for 16 elements with 8 bit precision.

Parameters

image0	First 16 elements to determine the ssd for, may be non aligned
image1	Second 16 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

◆ sumSquareDifference8BitBack12Elements()

__m128i Ocean::CV::SSE::sumSquareDifference8BitBack12Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square difference determination for the last 12 elements of an 16 elements buffer with 8 bit precision, the beginning 4 elements are interpreted as zero.

However, the provides buffers must be at least 16 bytes large as the entire 16 bytes will be loaded to the SSE registers.
Thus, this function handles two buffers with this pattern (while the memory starts left and ends right): [NA NA NA NA 04 05 06 07 08 09 10 11 12 13 14 15].

Parameters

image0	First (4+) 12 elements to determine the ssd for, with any alignment
image1	Second (4+) 12 elements to determine the ssd for, with any alignment

Returns: SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

◆ sumSquareDifference8BitBack13Elements()

__m128i Ocean::CV::SSE::sumSquareDifference8BitBack13Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square difference determination for the last 13 elements of an 16 elements buffer with 8 bit precision, the beginning 3 elements are interpreted as zero.

However, the provides buffers must be at least 16 bytes large as the entire 16 bytes will be loaded to the SSE registers.
Thus, this function handles two buffers with this pattern (while the memory starts left and ends rights: [NA NA NA 03 04 05 06 07 08 09 10 11 12 13 14 15].

Parameters

image0	First (3+) 13 elements to determine the ssd for, may be non aligned
image1	Second (3+) 13 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

◆ sumSquareDifference8BitFront12Elements()

__m128i Ocean::CV::SSE::sumSquareDifference8BitFront12Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square difference determination for the first 12 elements of an 16 elements buffer with 8 bit precision, the remaining 4 elements are set to zero.

However, the provides buffers must be at least 16 bytes large as the entire 16 bytes will be loaded to the SSE registers.
Thus, this function handles two buffers with this pattern (while the memory starts left and ends rights: [00 01 02 03 04 05 06 07 08 09 10 11 NA NA NA NA].

Parameters

image0	First 12 (+4) elements to determine the ssd for, with any alignment
image1	Second 12 (+4) elements to determine the ssd for, with any alignment

Returns: SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

◆ sumSquareDifference8BitFront13Elements()

template<bool tBufferHas16Bytes>

__m128i Ocean::CV::SSE::sumSquareDifference8BitFront13Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square difference determination for the first 13 elements of a buffer with 8 bit precision.

This function supports to load the 13 elements from a buffer with only 13 bytes or with a buffer with at least 16 bytes.

Parameters

image0	First 13 elements to determine the ssd for, may be non aligned
image1	Second 13 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

Template Parameters

tBufferHas16Bytes True, if the buffer holds at least 16 bytes; False, if the buffer holds 13 bytes only

◆ sumSquareDifference8BitFront15Elements()

template<bool tBufferHas16Bytes>

__m128i Ocean::CV::SSE::sumSquareDifference8BitFront15Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square difference determination for the first 15 elements of a buffer with 8 bit precision.

This function supports to load the 15 elements from a buffer with only 15 bytes or with a buffer with at least 16 bytes.

Parameters

image0	First 15 elements to determine the ssd for, may be non aligned
image1	Second 15 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

Template Parameters

tBufferHas16Bytes True, if the buffer holds at least 16 bytes; False, if the buffer holds 15 bytes only

◆ sumSquareDifferences8BitBack11Elements()

__m128i Ocean::CV::SSE::sumSquareDifferences8BitBack11Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square differences determination for the last 11 elements of an 16 elements buffer with 8 bit precision.

Parameters

image0	First 11 elements to determine the ssd for, may be non aligned
image1	Second 11 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

◆ swapReversedChannelOrder3Channel8Bit48Elements()

void Ocean::CV::SSE::swapReversedChannelOrder3Channel8Bit48Elements	(	uint8_t *	first,
		uint8_t *	second
	)

inlinestatic

Reverses the order of the first and last channel of two sets of 48 elements of an image with 3 interleaved channels and 8 bit per element and further swaps both sets.

Parameters

first	First 48 elements of an image with 3 channels and 8 bit per element (48 bytes)
second	Second 48 elements of an image with 3 channels and 8 bit per element (48 bytes)

◆ swapReversedElements8Bit48Elements()

void Ocean::CV::SSE::swapReversedElements8Bit48Elements	(	uint8_t *	first,
		uint8_t *	second
	)

inlinestatic

Reverses the order of two sets of 48 elements with 8 bit per element and further swaps both sets.

Parameters

first	First 48 elements that will be reversed and swapped with the second 48 elements
second	Second 48 elements that will be reversed and swapped with the first 48 elements

◆ value_u16()

template<unsigned int tIndex>

uint16_t Ocean::CV::SSE::value_u16 ( const __m128i & value )

inlinestatic

Returns one specific 16 bit unsigned integer value of a m128i value object.

Parameters

value The value from which the 16 bit value will be returned

Returns: The requested 16 bit value

Template Parameters

tIndex The index of the requested 16 bit integer value, with range [0, 7]

◆ value_u32()

template<unsigned int tIndex>

unsigned int Ocean::CV::SSE::value_u32 ( const __m128i & value )

inlinestatic

Returns one specific 32 bit unsigned integer value of a m128i value object.

Parameters

value The value from which the 32 bit value will be returned

Returns: The requested 32 bit value

Template Parameters

tIndex The index of the requested 32 bit integer value, with range [0, 3]

◆ value_u8() [1/2]

template<unsigned int tIndex>

uint8_t Ocean::CV::SSE::value_u8 ( const __m128i & value )

inlinestatic

Returns one specific 8 bit unsigned integer value of a m128i value object.

Parameters

value The value from which the 8 bit value will be returned

Returns: The requested 8 bit value

Template Parameters

tIndex The index of the requested 8 bit integer value, with range [0, 15]

◆ value_u8() [2/2]

uint8_t Ocean::CV::SSE::value_u8	(	const __m128i &	value,
		const unsigned int	index
	)

inlinestatic

Returns one specific 8 bit unsigned integer value of a m128i value object.

Parameters

value	The value from which the 8 bit value will be returned
index	The index of the requested 8 bit integer value, with range [0, 15]

Returns: The requested 8 bit value

The documentation for this class was generated from the following file:

SSE.h

Data Structures

Static Public Member Functions

Static Private Member Functions

Detailed Description

Member Function Documentation

◆ addOffsetBeforeRightShiftDivisionByTwoSigned16Bit()

◆ addOffsetBeforeRightShiftDivisionByTwoSigned32Bit()

◆ addOffsetBeforeRightShiftDivisionSigned16Bit()

◆ addOffsetBeforeRightShiftDivisionSigned32Bit()

◆ average16Elements1Channel8Bit2x2()

◆ average16Elements2Channel16Bit2x2()

◆ average16Elements4Channel32Bit2x2()

◆ average16ElementsBinary1Channel8Bit2x2()

◆ average24Elements3Channel24Bit2x2()

◆ average30Elements1Channel8Bit3x3()

◆ average32Elements1Channel8Bit2x2()

◆ average32Elements2Channel16Bit2x2()

◆ average32Elements4Channel32Bit2x2()

◆ average32ElementsBinary1Channel8Bit2x2()

◆ average6Elements3Channel96Bit2x2()

◆ average8Elements1Channel32Bit2x2()

◆ average8Elements1Channel8Bit2x2()

◆ average8Elements2Channel16Bit2x2()

◆ average8Elements2Channel64Bit2x2()

◆ average8Elements4Channel128Bit2x2()

◆ average8ElementsBinary1Channel8Bit2x2()

◆ bitMaskRemoveHigh16_8()

◆ bitMaskRemoveHigh32_16()

◆ deInterleave3Channel8Bit15Elements()

◆ deInterleave3Channel8Bit24Elements()

◆ deInterleave3Channel8Bit45Elements()

◆ deInterleave3Channel8Bit48Elements() [1/3]

◆ deInterleave3Channel8Bit48Elements() [2/3]

◆ deInterleave3Channel8Bit48Elements() [3/3]

◆ divideByRightShiftSigned16Bit()

◆ divideByRightShiftSigned32Bit()

◆ gradientHorizontalVertical8Elements1Channel8Bit()

◆ gradientHorizontalVertical8Elements3Products1Channel8Bit()

◆ interleave3Channel8Bit48Elements() [1/2]

◆ interleave3Channel8Bit48Elements() [2/2]

◆ interpolation1Channel8Bit15Elements()

◆ interpolation1Channel8Bit8Elements()

◆ interpolation2Channel16Bit1x1()

◆ interpolation2Channel16Bit8Elements()

◆ interpolation3Channel24Bit12Elements()

◆ interpolation3Channel24Bit8Elements()

◆ interpolation4Channel32Bit2x4Elements()

◆ interpolation4Channel32Bit8Elements()

◆ load128i()

◆ load128iLower64()

◆ load_u8_10_upper_zero() [1/2]

◆ load_u8_10_upper_zero() [2/2]

◆ load_u8_13_lower_random() [1/2]

◆ load_u8_13_lower_random() [2/2]

◆ load_u8_15_lower_random() [1/2]

◆ load_u8_15_lower_random() [2/2]

◆ load_u8_15_lower_zero() [1/2]

◆ load_u8_15_lower_zero() [2/2]

◆ load_u8_15_upper_zero() [1/2]

◆ load_u8_15_upper_zero() [2/2]

◆ load_u8_16_and_shift_right()

◆ moveHighBits16_8()

◆ moveHighBits16_8_5()

◆ moveHighBits16_8_6()

◆ moveHighBits16_8_7()

◆ moveHighBits32_16()

◆ moveLowBits16_8ToHigh64()

◆ moveLowBits16_8ToLow64()

◆ moveLowBits32_16ToLow64()

◆ moveLowBits32_8ToLow32()

◆ multiplyInt8x16ToInt32x8()

◆ multiplyInt8x16ToInt32x8AndAccumulate()

◆ prefetchNTA()

◆ prefetchT0()

◆ prefetchT1()

◆ prefetchT2()

◆ removeHighBits16_8()

◆ removeHighBits16_8_7_lower()

◆ removeHighBits16_8_7_upper()

◆ removeHighBits32_16()