No Matches
Ocean::CV::SSE Class Reference

This class implements computer vision functions using SSE extensions. More...

#include <SSE.h>

Data Structures

union  M128
 This union defines a wrapper for the __m128 SSE intrinsic data type. More...
union  M128d
 This union defines a wrapper for the __m128 SSE intrinsic data type. More...
union  M128i
 This union defines a wrapper for the __m128i SSE intrinsic data type. More...

Static Public Member Functions

static void prefetchT0 (const void *const data)
 Prefetches a block of temporal memory into all cache levels.
static void prefetchT1 (const void *const data)
 Prefetches a block of temporal memory in all cache levels except 0th cache level.
static void prefetchT2 (const void *const data)
 Prefetches a block of temporal memory in all cache levels, except 0th and 1st cache levels.
static void prefetchNTA (const void *const data)
 Prefetches a block of non-temporal memory into non-temporal cache structure.
template<unsigned int tIndex>
static uint8_t value_u8 (const __m128i &value)
 Returns one specific 8 bit unsigned integer value of a m128i value object.
static uint8_t value_u8 (const __m128i &value, const unsigned int index)
 Returns one specific 8 bit unsigned integer value of a m128i value object.
template<unsigned int tIndex>
static uint16_t value_u16 (const __m128i &value)
 Returns one specific 16 bit unsigned integer value of a m128i value object.
template<unsigned int tIndex>
static unsigned int value_u32 (const __m128i &value)
 Returns one specific 32 bit unsigned integer value of a m128i value object.
static OCEAN_FORCE_INLINE unsigned int sum_u32_4 (const __m128i &value)
 Adds the four (all four) individual 32 bit unsigned integer values of a m128i value and returns the result.
static unsigned int sum_u32_first_2 (const __m128i &value)
 Adds the first two individual 32 bit unsigned integer values of a m128i value and returns the result.
static unsigned int sum_u32_first_third (const __m128i &value)
 Adds the first and the second 32 bit unsigned integer values of a m128i value and returns the result.
static OCEAN_FORCE_INLINE float sum_f32_4 (const __m128 &value)
 Adds the four (all four) individual 32 bit float of a m128 value and returns the result.
static OCEAN_FORCE_INLINE double sum_f64_2 (const __m128d &value)
 Adds the two (all two) individual 64 bit float of a m128 value and returns the result.
static __m128i sumSquareDifferences8BitBack11Elements (const uint8_t *const image0, const uint8_t *const image1)
 Sum square differences determination for the last 11 elements of an 16 elements buffer with 8 bit precision.
static __m128i sumSquareDifference8BitFront12Elements (const uint8_t *const image0, const uint8_t *const image1)
 Sum square difference determination for the first 12 elements of an 16 elements buffer with 8 bit precision, the remaining 4 elements are set to zero.
static __m128i sumSquareDifference8BitBack12Elements (const uint8_t *const image0, const uint8_t *const image1)
 Sum square difference determination for the last 12 elements of an 16 elements buffer with 8 bit precision, the beginning 4 elements are interpreted as zero.
template<bool tBufferHas16Bytes>
static __m128i sumSquareDifference8BitFront13Elements (const uint8_t *const image0, const uint8_t *const image1)
 Sum square difference determination for the first 13 elements of a buffer with 8 bit precision.
static __m128i sumSquareDifference8BitBack13Elements (const uint8_t *const image0, const uint8_t *const image1)
 Sum square difference determination for the last 13 elements of an 16 elements buffer with 8 bit precision, the beginning 3 elements are interpreted as zero.
template<bool tBufferHas16Bytes>
static __m128i sumSquareDifference8BitFront15Elements (const uint8_t *const image0, const uint8_t *const image1)
 Sum square difference determination for the first 15 elements of a buffer with 8 bit precision.
static __m128i sumSquareDifference8Bit16Elements (const uint8_t *const image0, const uint8_t *const image1)
 Sum square difference determination for 16 elements with 8 bit precision.
static __m128i sumSquareDifference8Bit16ElementsAligned16 (const uint8_t *const image0, const uint8_t *const image1)
 Sum square difference determination for 16 elements with 8 bit precision.
static __m128i sumSquareDifference8Bit16Elements (const __m128i &row0, const __m128i &row1)
 Sum square difference determination for 16 elements with 8 bit precision.
static void average8Elements1Channel32Bit2x2 (const float *const image0, const float *const image1, float *const result)
 Averages 8 elements of 2x2 blocks for 1 channel 32 bit frames.
static void average8Elements1Channel8Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result)
 Averages 8 elements of 2x2 blocks for 1 channel 8 bit frames.
static void average8ElementsBinary1Channel8Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result, const uint16_t threshold=776u)
 Averages 8 elements of 2x2 blocks for 1 binary (x00 or 0xFF) frames.
static void average16Elements1Channel8Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result)
 Averages 16 elements of 2x2 blocks for 1 channel 8 bit frames.
static void average16ElementsBinary1Channel8Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result, const uint16_t threshold=776u)
 Averages 16 elements of 2x2 blocks for 1 binary (x00 or 0xFF) frames.
static void average32Elements1Channel8Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result)
 Averages 32 elements of 2x2 blocks for 1 channel 8 bit frames.
static void average32ElementsBinary1Channel8Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result, const uint16_t threshold=776u)
 Averages 32 elements of 2x2 blocks for 1 binary (0x00 or 0xFF) frames.
static void average8Elements2Channel16Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result)
 Averages 8 elements of 2x2 blocks for 2 channel 16 bit frames.
static void average8Elements2Channel64Bit2x2 (const float *const image0, const float *const image1, float *const result)
 Averages 8 elements of 2x2 blocks for 2 channel 64 bit frames.
static void average16Elements2Channel16Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result)
 Averages 16 elements of 2x2 blocks for 2 channel 16 bit frames.
static void average32Elements2Channel16Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result)
 Averages 32 elements of 2x2 blocks for 2 channel 16 bit frames.
static void average6Elements3Channel96Bit2x2 (const float *const image0, const float *const image1, float *const result)
 Averages 6 elements of 2x2 blocks for 3 channel 96 bit frames.
static void average24Elements3Channel24Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result)
 Averages 24 elements of 2x2 blocks for 3 channel 24 bit frames.
static void average8Elements4Channel128Bit2x2 (const float *const image0, const float *const image1, float *const result)
 Averages 8 elements of 2x2 blocks for 4 channel 128 bit frames.
static void average16Elements4Channel32Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result)
 Averages 16 elements of 2x2 blocks for 4 channel 32 bit frames.
static void average32Elements4Channel32Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result)
 Averages 32 elements of 2x2 blocks for 4 channel 32 bit frames.
static void average30Elements1Channel8Bit3x3 (const uint8_t *const image0, const uint8_t *const image1, const uint8_t *const image2, uint8_t *const result)
 Averages 30 elements of 3x3 blocks for 1 channel 8 bit frames.
static __m128i addOffsetBeforeRightShiftDivisionByTwoSigned16Bit (const __m128i &value)
 Adds 1 to each signed 16 bit value which is both, negative and odd, so that each value can be right shifted by one bit to allow a correct division by two.
static __m128i addOffsetBeforeRightShiftDivisionSigned16Bit (const __m128i &value, const unsigned int rightShifts)
 Adds 2^shifts - 1 to each negative signed 16 bit value, so they each value can be right shifted to allow a correct division by 2^shifts.
static __m128i divideByRightShiftSigned16Bit (const __m128i &value, const unsigned int rightShifts)
 Divides eight signed 16 bit values by applying a right shift.
static __m128i addOffsetBeforeRightShiftDivisionByTwoSigned32Bit (const __m128i &value)
 Adds 1 to each signed 32 bit value which is both, negative and odd, so that each value can be right shifted by one bit to allow a correct division by two.
static __m128i addOffsetBeforeRightShiftDivisionSigned32Bit (const __m128i &value, const unsigned int rightShifts)
 Adds 2^shifts - 1 to each negative signed 32 bit value, so they each value can be right shifted to allow a correct division by 2^shifts.
static __m128i divideByRightShiftSigned32Bit (const __m128i &value, const unsigned int rightShifts)
 Divides eight signed 32 bit values by applying a right shift.
static void gradientHorizontalVertical8Elements1Channel8Bit (const uint8_t *source, int8_t *response, const unsigned int width)
 Determines the horizontal and the vertical gradients for 16 following pixels for a given 1 channel 8 bit frame.
static void gradientHorizontalVertical8Elements3Products1Channel8Bit (const uint8_t *source, int16_t *response, const unsigned int width)
 Determines the squared horizontal and vertical gradients and the product of both gradients for 16 following pixels for a given 1 channel 8 bit frame.
static __m128i sumAbsoluteDifferences8BitBack11Elements (const uint8_t *const image0, const uint8_t *const image1)
 Sum absolute differences determination for the last 11 elements of a 16 elements buffer with 8 bit precision.
template<bool tBufferHas16Bytes>
static __m128i sumAbsoluteDifferences8BitFront10Elements (const uint8_t *const image0, const uint8_t *const image1)
 Sum absolute differences determination for the first 10 elements of a buffer with 8 bit precision.
template<bool tBufferHas16Bytes>
static __m128i sumAbsoluteDifferences8BitFront15Elements (const uint8_t *const image0, const uint8_t *const image1)
 Sum absolute differences determination for the first 15 elements of a buffer with 8 bit precision.
static __m128i interpolation1Channel8Bit8Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_, const __m128i &fxfy_, const __m128i &fx_fy, const __m128i &fxfy)
 Interpolates 8 elements of 2x2 blocks for 1 channel 8 bit frames.
static __m128i interpolation2Channel16Bit8Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_, const __m128i &fxfy_, const __m128i &fx_fy, const __m128i &fxfy)
 Interpolates 8 elements of 2x2 blocks for 2 channel 16 bit frames.
static __m128i interpolation3Channel24Bit8Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_, const __m128i &fxfy_, const __m128i &fx_fy, const __m128i &fxfy)
 Interpolates 8 elements of 2x2 blocks for 3 channel 24 bit frames.
static __m128i interpolation1Channel8Bit15Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_fxfy_, const __m128i &fx_fyfxfy)
 Interpolates 15 elements of 2x2 blocks for 1 channel 8 bit frames.
static __m128i interpolation3Channel24Bit12Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_fxfy_, const __m128i &fx_fyfxfy)
 Interpolates 12 elements of 2x2 blocks for 3 channel 24 bit frames.
static __m128i interpolation4Channel32Bit8Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_, const __m128i &fxfy_, const __m128i &fx_fy, const __m128i &fxfy)
 Interpolates 8 elements of 2x2 blocks for 4 channel 32 bit frames.
static __m128i interpolation4Channel32Bit2x4Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_, const __m128i &fxfy_, const __m128i &fx_fy, const __m128i &fxfy)
 Interpolates 2x4 elements (two seperated blocks of 4 elements) of 2x2 blocks for 4 channel 32 bit frames.
static unsigned int ssd2Channel16Bit1x1 (const uint8_t *const pixel0, const uint8_t *const pixel1, const unsigned int size0, const unsigned int size1, const unsigned int f1x_y_, const unsigned int f1xy_, const unsigned int f1x_y, const unsigned int f1xy)
 Returns the interpolated sum of square difference for one 2 channel 16 bit pixel.
static unsigned int ssd2Channel16Bit1x1 (const uint8_t *const pixel0, const uint8_t *const pixel1, const unsigned int size0, const unsigned int size1, const unsigned int f0x_y_, const unsigned int f0xy_, const unsigned int f0x_y, const unsigned int f0xy, const unsigned int f1x_y_, const unsigned int f1xy_, const unsigned int f1x_y, const unsigned int f1xy)
 Returns the interpolated sum of square difference for one 2 channel 16 bit pixel.
static __m128i sumAbsoluteDifferences8Bit16Elements (const uint8_t *const image0, const uint8_t *const image1)
 Sum absolute differences determination for 16 elements of an 16 elements buffer with 8 bit precision.
static OCEAN_FORCE_INLINE void deInterleave3Channel8Bit15Elements (const __m128i &interleaved, __m128i &channel01, __m128i &channel2)
 Deinterleaves 15 elements of e.g., and image with 3 channels and 8 bit per element.
static OCEAN_FORCE_INLINE void deInterleave3Channel8Bit24Elements (const __m128i &interleavedA, const __m128i &interleavedB, __m128i &channel01, __m128i &channel2)
 Deinterleaves 24 elements of e.g., and image with 3 channels and 8 bit per element.
static OCEAN_FORCE_INLINE void deInterleave3Channel8Bit48Elements (const __m128i &interleavedA, const __m128i &interleavedB, const __m128i &interleavedC, __m128i &channel0, __m128i &channel1, __m128i &channel2)
 Deinterleaves 48 elements of e.g., and image with 3 channels and 8 bit per element.
static void deInterleave3Channel8Bit48Elements (const uint8_t *interleaved, __m128i &channel0, __m128i &channel1, __m128i &channel2)
 Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
static void deInterleave3Channel8Bit48Elements (const uint8_t *interleaved, uint8_t *channel0, uint8_t *channel1, uint8_t *channel2)
 Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
static void deInterleave3Channel8Bit45Elements (const uint8_t *interleaved, __m128i &channel0, __m128i &channel1, __m128i &channel2)
 Deinterleaves 45 elements of e.g., an image with 3 channels and 8 bit per element.
static OCEAN_FORCE_INLINE void interleave3Channel8Bit48Elements (const __m128i &channel0, const __m128i &channel1, const __m128i &channel2, __m128i &interleavedA, __m128i &interleavedB, __m128i &interleavedC)
 Interleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
static OCEAN_FORCE_INLINE void interleave3Channel8Bit48Elements (const uint8_t *const channel0, const uint8_t *const channel1, const uint8_t *const channel2, uint8_t *const interleaved)
 Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
static OCEAN_FORCE_INLINE void reverseChannelOrder2Channel8Bit32Elements (const uint8_t *interleaved, uint8_t *reversedInterleaved)
 Reverses the order of the channels of 16 pixels (32 elements) of an image with 2 interleaved channels and 8 bit per element (e.g., YA16 to AY16).
static OCEAN_FORCE_INLINE void reverseChannelOrder3Channel8Bit48Elements (const __m128i &interleaved0, const __m128i &interleaved1, const __m128i &interleaved2, __m128i &reversedInterleaved0, __m128i &reversedInterleaved1, __m128i &reversedInterleaved2)
 Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channels and 8 bit per element.
static OCEAN_FORCE_INLINE void reverseChannelOrder3Channel8Bit48Elements (const uint8_t *interleaved, uint8_t *reversedInterleaved)
 Reverses the order of the first and last channel of 48 elements (16 pixels) of an image with 3 interleaved channels and 8 bit per element (e.g., RGB24 to BGR24).
static OCEAN_FORCE_INLINE void reverseChannelOrder4Channel8Bit64Elements (const uint8_t *interleaved, uint8_t *reversedInterleaved)
 Reverses the order of the channels of 16 pixels (64 elements) of an image with 4 interleaved channels and 8 bit per element (e.g., RGBA32 to ABGR24).
static void reverseChannelOrder3Channel8Bit48Elements (uint8_t *interleaved)
 Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channels and 8 bit per element (in place).
static void swapReversedChannelOrder3Channel8Bit48Elements (uint8_t *first, uint8_t *second)
 Reverses the order of the first and last channel of two sets of 48 elements of an image with 3 interleaved channels and 8 bit per element and further swaps both sets.
static void reverseElements8Bit48Elements (const __m128i &elements0, const __m128i &elements1, const __m128i &elements2, __m128i &reversedElements0, __m128i &reversedElements1, __m128i &reversedElements2)
 Reverses the order of 48 elements with 8 bit per element.
static void reverseElements8Bit48Elements (const uint8_t *elements, uint8_t *reversedElements)
 Reverses the order of 48 elements with 8 bit per element.
static void reverseElements8Bit48Elements (uint8_t *elements)
 Reverses the order of 48 elements with 8 bit per element (in place).
static void swapReversedElements8Bit48Elements (uint8_t *first, uint8_t *second)
 Reverses the order of two sets of 48 elements with 8 bit per element and further swaps both sets.
static void shiftChannelToFront4Channel32Bit (const uint8_t *elements, uint8_t *shiftedElements)
 Shifts the channels of a 4 channel 32 bit pixels to the front and moves the front channel to the back channel.
static void shiftAndMirrorChannelToFront4Channel32Bit (const uint8_t *elements, uint8_t *shiftedElements)
 Shifts the channels of a 4 channel 32 bit pixels to the front and moves the front channel to the back channel and mirrors the four individual pixels.
static void shiftChannelToBack4Channel32Bit (const uint8_t *elements, uint8_t *shiftedElements)
 Shifts the channels of a 4 channel 32 bit pixels to the back and moves the back channel to the front channel.
static void shiftAndMirrorChannelToBack4Channel32Bit (const uint8_t *elements, uint8_t *shiftedElements)
 Shifts the channels of a 4 channel 32 bit pixels to the back and moves the back channel to the front channel and mirrors the four individual pixels.
static __m128i sum1Channel8Bit16Elements (const __m128i &elements)
 Sums 16 elements with 8 bit per element.
static __m128i sum1Channel8Bit16Elements (const uint8_t *elements)
 Sums 16 elements with 8 bit per element.
template<bool tBufferHas16Bytes>
static __m128i sum1Channel8BitFront15Elements (const uint8_t *elements)
 Sums the first 15 elements of a buffer with 8 bit per element.
static __m128i sum1Channel8BitBack15Elements (const uint8_t *elements)
 Sums the last 15 elements of a 16 elements buffer with 8 bit per element, the beginning 1 element is interpreted as zero.
static __m128i sumInterleave3Channel8Bit48Elements (const __m128i &interleaved0, const __m128i &interleaved1, const __m128i &interleaved2)
 Sums 16 elements individually for an interleaved pixel format with 3 channels and 8 bit per channel and element.
static __m128i sumInterleave3Channel8Bit48Elements (const uint8_t *interleaved)
 Sums 16 elements individually for an interleaved pixel format with 3 channels and 8 bit per channel and element.
static __m128i sumInterleave3Channel8Bit45Elements (const uint8_t *interleaved)
 Sums 15 elements individually for an interleaved pixel format with 3 channels and 8 bit per channel and element.
static __m128i load128iLower64 (const void *const buffer)
 Loads the lower 64 bit of a 128i value from the memory.
static __m128i load128i (const void *const buffer)
 Loads a 128i value from the memory.
template<bool tBufferHas16Bytes>
static __m128i load_u8_10_upper_zero (const uint8_t *const buffer)
 Loads 10 bytes from memory, which holds either at least 16 bytes or exactly 10 bytes, to a 128i value and sets the remaining bytes of the resulting 128i value to zero.
template<bool tBufferHas16Bytes>
static __m128i load_u8_15_upper_zero (const uint8_t *const buffer)
 Loads 15 bytes from memory, which holds either at least 16 bytes or exactly 15 bytes, to a 128i value and sets the remaining byte of the resulting 128i value to zero.
template<bool tBufferHas16Bytes>
static __m128i load_u8_13_lower_random (const uint8_t *const buffer)
 Loads 13 bytes from memory, which holds either at least 16 bytes or exactly 13 bytes, to a 128i value while the remaining byte of the resulting 128i value will be random.
template<bool tBufferHas16Bytes>
static __m128i load_u8_15_lower_zero (const uint8_t *const buffer)
 Loads 15 bytes from memory, which holds either at least 16 bytes or exactly 15 bytes, to a 128i value and sets the remaining byte of the resulting 128i value to zero.
template<bool tBufferHas16Bytes>
static __m128i load_u8_15_lower_random (const uint8_t *const buffer)
 Loads 15 bytes from memory, which holds either at least 16 bytes or exactly 15 bytes, to a 128i value while the remaining byte of the resulting 128i value will be random.
template<unsigned int tShiftBytes>
static __m128i load_u8_16_and_shift_right (const uint8_t *const buffer)
 Loads 16 bytes from memory which is at least 16 bytes large and shifts the 128i value by a specified number of bytes to the right (by inserting zeros).
static void store128i (const __m128i &value, uint8_t *const buffer)
 Stores a 128i value to the memory.
static __m128i set128i (const unsigned long long high64, const unsigned long long low64)
 Sets a 128i value by two 64 bit values.
static __m128i removeHighBits32_16 (const __m128i &value)
 Removes the higher 16 bits of four 32 bit elements.
static __m128i removeLowBits32_16 (const __m128i &value)
 Removes the lower 16 bits of four 32 bit elements.
static __m128i removeHighBits16_8 (const __m128i &value)
 Removes the higher 8 bits of eight 16 bit elements.
static __m128i removeHighBits16_8_7_lower (const __m128i &value)
 Removes the higher 8 bits of eight 16 bit elements and sets the upper two bytes to zero.
static __m128i removeHighBits16_8_7_upper (const __m128i &value)
 Removes the higher 8 bits of eight 16 bit elements and sets the lower two bytes to zero.
static __m128i moveLowBits16_8ToLow64 (const __m128i &value)
 Moves the lower 8 bits of eight 16 bit elements to the lower 64 bits and fills the high 64 bits with 0.
static __m128i moveLowBits32_8ToLow32 (const __m128i &value)
 Moves the lower 8 bits of four 32 bit elements to the lower 32 bits and fills the high 96 bits with 0.
static __m128i moveLowBits32_16ToLow64 (const __m128i &value)
 Moves the lower 16 bits of four 32 bit elements to the lower 64 bits and fills the high 64 bits with 0.
static __m128i moveLowBits16_8ToHigh64 (const __m128i &value)
 Moves the lower 8 bits of eight 16 bit elements to the higher 64 bits and fills the low 64 bits with 0.
static __m128i moveHighBits32_16 (const __m128i &value)
 Moves the higher 16 bits of four 32 bit elements to the lower 16 bits and fills the high bits with 0.
static __m128i moveHighBits16_8 (const __m128i &value)
 Moves the higher 8 bits of eight 16 bit elements to the lower 8 bits and fills the high bits with 0.
static __m128i moveHighBits16_8_5 (const __m128i &value)
 Moves the higher 8 bits of five 16 bit elements to the lower 8 bits and fills the high bits with 0.
static __m128i moveHighBits16_8_6 (const __m128i &value)
 Moves the higher 8 bits of six 16 bit elements to the lower 8 bits and fills the high bits with 0.
static __m128i moveHighBits16_8_7 (const __m128i &value)
 Moves the higher 8 bits of seven 16 bit elements to the lower 8 bits and fills the high bits with 0.
static __m128i shuffleLow32ToLow32_8 (const __m128i &value)
 Shuffles the lower four 8 bits to the low 8 bits of four 32 bit elements.
static __m128i shuffleNeighbor4Low64BitsToLow16_8 (const __m128i &value)
 Shuffles pairs of four neighbors of the low 64 bits to the low 8 bits of eight 16 bit elements.
static __m128i shuffleNeighbor4High64BitsToLow16_8 (const __m128i &value)
 Shuffles pairs of four neighbors of the high 64 bits to the low 8 bits of eight 16 bit elements.
static __m128i shuffleNeighbor2Low64BitsToLow16_8 (const __m128i &value)
 Shuffles pairs of two neighbors of the low 64 bits to the low 8 bits of eight 16 bit elements.
static __m128i shuffleNeighbor2High64BitsToLow16_8 (const __m128i &value)
 Shuffles pairs of two neighbors of the high 64 bits to the low 8 bits of eight 16 bit elements.
static __m128i bitMaskRemoveHigh16_8 ()
 Returns the following 128 bit mask: 0x00FF00FF-00FF00FF-00FF00FF-00FF00FF.
static __m128i bitMaskRemoveHigh32_16 ()
 Returns the following 128 bit mask: 0x0000FFFF-0000FFFF-0000FFFF-0000FFFF.
static OCEAN_FORCE_INLINE void multiplyInt8x16ToInt32x8 (const __m128i &values0, const __m128i &values1, __m128i &products0, __m128i &products1)
 Multiplies 8 int16_t values with 8 int16_t values and returns the products as 8 int32_t results.
static OCEAN_FORCE_INLINE void multiplyInt8x16ToInt32x8AndAccumulate (const __m128i &values0, const __m128i &values1, __m128i &results0, __m128i &results1)
 Multiplies 8 int16_t values with 8 int16_t values and adds the products to 8 int32_t values.
__m128i load_u8_10_upper_zero (const uint8_t *const buffer)
__m128i load_u8_15_upper_zero (const uint8_t *const buffer)
__m128i load_u8_13_lower_random (const uint8_t *const buffer)
__m128i load_u8_15_lower_zero (const uint8_t *const buffer)
__m128i load_u8_15_lower_random (const uint8_t *const buffer)

Static Private Member Functions

static unsigned int interpolation2Channel16Bit1x1 (const uint8_t *const pixel, const unsigned int size, const unsigned int fx_y_, const unsigned int fxy_, const unsigned int fx_y, const unsigned int fxy)
 Returns the interpolated pixel values for one 2 channel 16 bit pixel.

Detailed Description

This class implements computer vision functions using SSE extensions.

Member Function Documentation

◆ addOffsetBeforeRightShiftDivisionByTwoSigned16Bit()

__m128i Ocean::CV::SSE::addOffsetBeforeRightShiftDivisionByTwoSigned16Bit ( const __m128i &  value)

Adds 1 to each signed 16 bit value which is both, negative and odd, so that each value can be right shifted by one bit to allow a correct division by two.

This function must be invoked before the right shift is applied.

valueThe eight signed 16 bit values to be handled
The modified value for which divide (/ 2) and bit shift (>> 1) yield equal (and correct!) results

SSE does not have an intrinsic for integer division, so right bit shift is used instead. Unfortunately, for negative odd integer values v: (v / 2) != (v >> 1) because a right shift rounds towards negative infinity, e.g. -5 / 2 = -2 and -5 >> 1 = -3. As a work-around, an offset of 1 is added to all values that are both, negative and odd.

◆ addOffsetBeforeRightShiftDivisionByTwoSigned32Bit()

__m128i Ocean::CV::SSE::addOffsetBeforeRightShiftDivisionByTwoSigned32Bit ( const __m128i &  value)

Adds 1 to each signed 32 bit value which is both, negative and odd, so that each value can be right shifted by one bit to allow a correct division by two.

This function must be invoked before the right shift is applied.

valueThe eight signed 32 bit values to be handled
The modified value for which divide (/ 2) and bit shift (>> 1) yield equal (and correct!) results

SSE does not have an intrinsic for integer division, so right bit shift is used instead. Unfortunately, for negative odd integer values v: (v / 2) != (v >> 1) because a right shift rounds towards negative infinity, e.g. -5 / 2 = -2 and -5 >> 1 = -3. As a work-around, an offset of 1 is added to all values that are both, negative and odd.

◆ addOffsetBeforeRightShiftDivisionSigned16Bit()

__m128i Ocean::CV::SSE::addOffsetBeforeRightShiftDivisionSigned16Bit ( const __m128i &  value,
const unsigned int  rightShifts 

Adds 2^shifts - 1 to each negative signed 16 bit value, so they each value can be right shifted to allow a correct division by 2^shifts.

This function must be invoked before the right shift is applied.

valueThe eight signed 16 bit values to be handled
rightShiftsThe number of right shifts which needs to be applied, with range [0, 15]
The modified value for which division a shift yield equal (and correct!) results

◆ addOffsetBeforeRightShiftDivisionSigned32Bit()

__m128i Ocean::CV::SSE::addOffsetBeforeRightShiftDivisionSigned32Bit ( const __m128i &  value,
const unsigned int  rightShifts 

Adds 2^shifts - 1 to each negative signed 32 bit value, so they each value can be right shifted to allow a correct division by 2^shifts.

This function must be invoked before the right shift is applied.

valueThe eight signed 32 bit values to be handled
rightShiftsThe number of right shifts which needs to be applied, with range [0, 31]
The modified value for which division a shift yield equal (and correct!) results

◆ average16Elements1Channel8Bit2x2()

void Ocean::CV::SSE::average16Elements1Channel8Bit2x2 ( const uint8_t *const  image0,
const uint8_t *const  image1,
uint8_t *const  result 

Averages 16 elements of 2x2 blocks for 1 channel 8 bit frames.

The function takes two rows of 16 elements and returns 8 average elements (8 averaged pixels).

image0First row of 16 elements, must be valid
image1Second row of 16 elements, must be valid
resultResulting 8 average elements, must be valid

◆ average16Elements2Channel16Bit2x2()

void Ocean::CV::SSE::average16Elements2Channel16Bit2x2 ( const uint8_t *const  image0,
const uint8_t *const  image1,
uint8_t *const  result 

Averages 16 elements of 2x2 blocks for 2 channel 16 bit frames.

The function takes two rows of 32 elements and returns 8 average elements (4 averaged pixels, each with 2 channels).

image0First row of 16 elements
image1Second row of 16 elements
resultResulting 8 average elements

◆ average16Elements4Channel32Bit2x2()

void Ocean::CV::SSE::average16Elements4Channel32Bit2x2 ( const uint8_t *const  image0,
const uint8_t *const  image1,
uint8_t *const  result 

Averages 16 elements of 2x2 blocks for 4 channel 32 bit frames.

The function takes two rows of 16 elements and returns 8 average elements (2 averaged pixels, each with 4 channels).

image0First row of 16 elements
image1Second row of 16 elements
resultResulting 8 average elements

◆ average16ElementsBinary1Channel8Bit2x2()

void Ocean::CV::SSE::average16ElementsBinary1Channel8Bit2x2 ( const uint8_t *const  image0,
const uint8_t *const  image1,
uint8_t *const  result,
const uint16_t  threshold = 776u 

Averages 16 elements of 2x2 blocks for 1 binary (x00 or 0xFF) frames.

The function takes two rows of 16 elements and returns 8 average elements (8 averaged pixels).

image0First row of 16 elements, must be valid
image1Second row of 16 elements, must be valid
resultResulting 8 average elements, must be valid
thresholdThe minimal sum value of four pixels to result in a mask with value 255, with range [1, 255 * 4]

◆ average24Elements3Channel24Bit2x2()

void Ocean::CV::SSE::average24Elements3Channel24Bit2x2 ( const uint8_t *const  image0,
const uint8_t *const  image1,
uint8_t *const  result 

Averages 24 elements of 2x2 blocks for 3 channel 24 bit frames.

The function takes two rows of 24 elements and returns 12 average elements (4 averaged pixels, each with 3 channels).

image0First row of 24 elements
image1Second row of 24 elements
resultResulting 12 average elements

◆ average30Elements1Channel8Bit3x3()

void Ocean::CV::SSE::average30Elements1Channel8Bit3x3 ( const uint8_t *const  image0,
const uint8_t *const  image1,
const uint8_t *const  image2,
uint8_t *const  result 

Averages 30 elements of 3x3 blocks for 1 channel 8 bit frames.

The function takes two rows of 30 elements and returns 10 average elements (10 averaged pixels).

image0First row of 30 elements
image1Second row of 30 elements
image2Third row of 30 elements
resultResulting 10 average elements
| 1 2 1 |

1/16 | 2 4 2 | | 1 2 1 |

◆ average32Elements1Channel8Bit2x2()

void Ocean::CV::SSE::average32Elements1Channel8Bit2x2 ( const uint8_t *const  image0,
const uint8_t *const  image1,
uint8_t *const  result 

Averages 32 elements of 2x2 blocks for 1 channel 8 bit frames.

The function takes two rows of 32 elements and returns 16 average elements (16 averaged pixels).

image0First row of 32 elements
image1Second row of 32 elements
resultResulting 16 average elements

◆ average32Elements2Channel16Bit2x2()

void Ocean::CV::SSE::average32Elements2Channel16Bit2x2 ( const uint8_t *const  image0,
const uint8_t *const  image1,
uint8_t *const  result 

Averages 32 elements of 2x2 blocks for 2 channel 16 bit frames.

The function takes two rows of 32 elements and returns 16 average elements (8 averaged pixels, each with 2 channels).

image0First row of 32 elements
image1Second row of 32 elements
resultResulting 16 average elements

◆ average32Elements4Channel32Bit2x2()

void Ocean::CV::SSE::average32Elements4Channel32Bit2x2 ( const uint8_t *const  image0,
const uint8_t *const  image1,
uint8_t *const  result 

Averages 32 elements of 2x2 blocks for 4 channel 32 bit frames.

The function takes two rows of 32 elements and returns 16 average elements (4 averaged pixels, each with 4 channels).

image0First row of 32 elements
image1Second row of 32 elements
resultResulting 16 average elements

◆ average32ElementsBinary1Channel8Bit2x2()

void Ocean::CV::SSE::average32ElementsBinary1Channel8Bit2x2 ( const uint8_t *const  image0,
const uint8_t *const  image1,
uint8_t *const  result,
const uint16_t  threshold = 776u 

Averages 32 elements of 2x2 blocks for 1 binary (0x00 or 0xFF) frames.

The function takes two rows of 32 elements and returns 16 average elements (16 averaged pixels).

image0First row of 32 elements, must be valid
image1Second row of 32 elements, must be valid
resultResulting 16 average elements, must be valid
thresholdThe minimal sum value of four pixels to result in a mask with value 255, with range [1, 255 * 4]

◆ average6Elements3Channel96Bit2x2()

void Ocean::CV::SSE::average6Elements3Channel96Bit2x2 ( const float *const  image0,
const float *const  image1,
float *const  result 

Averages 6 elements of 2x2 blocks for 3 channel 96 bit frames.

The function takes two rows of 6 elements and returns 3 average elements (1 averaged pixels, each with 3 channels).

image0First row of 6 elements
image1Second row of 6 elements
resultResulting 3 average elements

◆ average8Elements1Channel32Bit2x2()

void Ocean::CV::SSE::average8Elements1Channel32Bit2x2 ( const float *const  image0,
const float *const  image1,
float *const  result 

Averages 8 elements of 2x2 blocks for 1 channel 32 bit frames.

The function takes two rows of 8 elements and returns 4 average elements (4 averaged pixels).

image0First row of 8 elements
image1Second row of 8 elements
resultResulting 4 average elements

◆ average8Elements1Channel8Bit2x2()

void Ocean::CV::SSE::average8Elements1Channel8Bit2x2 ( const uint8_t *const  image0,
const uint8_t *const  image1,
uint8_t *const  result 

Averages 8 elements of 2x2 blocks for 1 channel 8 bit frames.

The function takes two rows of 8 elements and returns 4 average elements (4 averaged pixels).

image0First row of 8 elements
image1Second row of 8 elements
resultResulting 4 average elements

◆ average8Elements2Channel16Bit2x2()

void Ocean::CV::SSE::average8Elements2Channel16Bit2x2 ( const uint8_t *const  image0,
const uint8_t *const  image1,
uint8_t *const  result 

Averages 8 elements of 2x2 blocks for 2 channel 16 bit frames.

The function takes two rows of 8 elements and returns 4 average elements (2 averaged pixels, each with 2 channels).

image0First row of 8 elements
image1Second row of 8 elements
resultResulting 4 average elements

◆ average8Elements2Channel64Bit2x2()

void Ocean::CV::SSE::average8Elements2Channel64Bit2x2 ( const float *const  image0,
const float *const  image1,
float *const  result 

Averages 8 elements of 2x2 blocks for 2 channel 64 bit frames.

The function takes two rows of 8 elements and returns 4 average elements (2 averaged pixels).

image0First row of 8 elements
image1Second row of 8 elements
resultResulting 4 average elements

◆ average8Elements4Channel128Bit2x2()

void Ocean::CV::SSE::average8Elements4Channel128Bit2x2 ( const float *const  image0,
const float *const  image1,
float *const  result 

Averages 8 elements of 2x2 blocks for 4 channel 128 bit frames.

The function takes two rows of 8 elements and returns 4 average elements (1 averaged pixel).

image0First row of 8 elements
image1Second row of 8 elements
resultResulting 4 average elements

◆ average8ElementsBinary1Channel8Bit2x2()

void Ocean::CV::SSE::average8ElementsBinary1Channel8Bit2x2 ( const uint8_t *const  image0,
const uint8_t *const  image1,
uint8_t *const  result,
const uint16_t  threshold = 776u 

Averages 8 elements of 2x2 blocks for 1 binary (x00 or 0xFF) frames.

The function takes two rows of 8 elements and returns 4 average elements (4 averaged pixels).

image0First row of 8 elements, must be valid
image1Second row of 8 elements, must be valid
resultResulting 4 average elementss, must be valid
thresholdThe minimal sum value of four pixels to result in a mask with value 255, with range [1, 255 * 4]

◆ bitMaskRemoveHigh16_8()

__m128i Ocean::CV::SSE::bitMaskRemoveHigh16_8 ( )

Returns the following 128 bit mask: 0x00FF00FF-00FF00FF-00FF00FF-00FF00FF.


◆ bitMaskRemoveHigh32_16()

__m128i Ocean::CV::SSE::bitMaskRemoveHigh32_16 ( )

Returns the following 128 bit mask: 0x0000FFFF-0000FFFF-0000FFFF-0000FFFF.


◆ deInterleave3Channel8Bit15Elements()

OCEAN_FORCE_INLINE void Ocean::CV::SSE::deInterleave3Channel8Bit15Elements ( const __m128i &  interleaved,
__m128i &  channel01,
__m128i &  channel2 

Deinterleaves 15 elements of e.g., and image with 3 channels and 8 bit per element.

This functions converts X CBA CBA CBA CBA CBA to 00000000000CCCCC 000BBBBB000AAAAA.

interleavedThe 15 elements holding the interleaved image data
channel01Resulting first and second channel elements, first 8 elements of the first channel, followed by 8 elements of the second channel
channel2Resulting third channel elements, first 8 elements of the third channel, followed by zeros

◆ deInterleave3Channel8Bit24Elements()

OCEAN_FORCE_INLINE void Ocean::CV::SSE::deInterleave3Channel8Bit24Elements ( const __m128i &  interleavedA,
const __m128i &  interleavedB,
__m128i &  channel01,
__m128i &  channel2 

Deinterleaves 24 elements of e.g., and image with 3 channels and 8 bit per element.


interleavedAFirst 16 elements holding the interleaved image data
interleavedBSecond 16 elements holding the interleaved image data, the first 8 elements will be used only
channel01Resulting first and second channel elements, first 8 elements of the first channel, followed by 8 elements of the second channel
channel2Resulting third channel elements, first 8 elements of the third channel, followed by zeros

◆ deInterleave3Channel8Bit45Elements()

void Ocean::CV::SSE::deInterleave3Channel8Bit45Elements ( const uint8_t *  interleaved,
__m128i &  channel0,
__m128i &  channel1,
__m128i &  channel2 

Deinterleaves 45 elements of e.g., an image with 3 channels and 8 bit per element.

interleaved45 elements of an image with 3 channels and 8 bit per element (45 bytes), must be valid
channel0Resulting first channel holding all elements corresponding to the first channel consecutively
channel1Resulting second channel holding all elements corresponding to the second channel consecutively
channel2Resulting third channel holding all elements corresponding to the third channel consecutively

◆ deInterleave3Channel8Bit48Elements() [1/3]

OCEAN_FORCE_INLINE void Ocean::CV::SSE::deInterleave3Channel8Bit48Elements ( const __m128i &  interleavedA,
const __m128i &  interleavedB,
const __m128i &  interleavedC,
__m128i &  channel0,
__m128i &  channel1,
__m128i &  channel2 

Deinterleaves 48 elements of e.g., and image with 3 channels and 8 bit per element.


interleavedAFirst 16 elements holding the interleaved image data
interleavedBSecond 16 elements holding the interleaved image data
interleavedCThird 16 elements holding the interleaved image data
channel0Resulting first channel holding all elements corresponding to the first channel consecutively
channel1Resulting second channel holding all elements corresponding to the second channel consecutively
channel2Resulting third channel holding all elements corresponding to the third channel consecutively

◆ deInterleave3Channel8Bit48Elements() [2/3]

void Ocean::CV::SSE::deInterleave3Channel8Bit48Elements ( const uint8_t *  interleaved,
__m128i &  channel0,
__m128i &  channel1,
__m128i &  channel2 

Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.

interleaved48 elements of an image with 3 channels and 8 bit per element (48 bytes)
channel0Resulting first channel holding all elements corresponding to the first channel consecutively
channel1Resulting second channel holding all elements corresponding to the second channel consecutively
channel2Resulting third channel holding all elements corresponding to the third channel consecutively

◆ deInterleave3Channel8Bit48Elements() [3/3]

void Ocean::CV::SSE::deInterleave3Channel8Bit48Elements ( const uint8_t *  interleaved,
uint8_t *  channel0,
uint8_t *  channel1,
uint8_t *  channel2 

Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.

interleaved48 elements of an image with 3 channels and 8 bit per element (48 bytes), must be valid
channel0Resulting first channel holding all elements corresponding to the first channel consecutively, must be valid
channel1Resulting second channel holding all elements corresponding to the second channel consecutively, must be valid
channel2Resulting third channel holding all elements corresponding to the third channel consecutively, must be valid

◆ divideByRightShiftSigned16Bit()

__m128i Ocean::CV::SSE::divideByRightShiftSigned16Bit ( const __m128i &  value,
const unsigned int  rightShifts 

Divides eight signed 16 bit values by applying a right shift.

This is able to determine the correct division result for positive and negative 16 bit values.

valueThe eight signed 16 bit values to be handled
rightShiftsThe number of right shifts which needs to be applied, with range [0, 15]
The divided values

◆ divideByRightShiftSigned32Bit()

__m128i Ocean::CV::SSE::divideByRightShiftSigned32Bit ( const __m128i &  value,
const unsigned int  rightShifts 

Divides eight signed 32 bit values by applying a right shift.

This is able to determine the correct division result for positive and negative 32 bit values.

valueThe eight signed 32 bit values to be handled
rightShiftsThe number of right shifts which needs to be applied, with range [0, 32]
The divided values

◆ gradientHorizontalVertical8Elements1Channel8Bit()

void Ocean::CV::SSE::gradientHorizontalVertical8Elements1Channel8Bit ( const uint8_t *  source,
int8_t *  response,
const unsigned int  width 

Determines the horizontal and the vertical gradients for 16 following pixels for a given 1 channel 8 bit frame.

The resulting gradients are interleaved and each response is inside the range [-127, 127] as the standard response is divided by two.

sourceThe source position of the first pixel to determine the gradient for, this pixel must not be a border pixel in the original frame
responseResulting gradient responses, first the horizontal response then the vertical response (zipped) for 8 pixels
widthThe width of the original frame in pixel, with range [10, infinity)

◆ gradientHorizontalVertical8Elements3Products1Channel8Bit()

void Ocean::CV::SSE::gradientHorizontalVertical8Elements3Products1Channel8Bit ( const uint8_t *  source,
int16_t *  response,
const unsigned int  width 

Determines the squared horizontal and vertical gradients and the product of both gradients for 16 following pixels for a given 1 channel 8 bit frame.

The resulting gradients are interleaved and each response is inside the range [-(127 * 127), 127 * 127] as the standard response is divided by two.

sourceThe source position of the first pixel to determine the gradient for, this pixel must not be a border pixel in the original frame
responseResulting gradient responses, first the horizontal response then the vertical response and afterwards the product of horizontal and vertical response (zipped) for 8 pixels
widthThe width of the original frame in pixel, with range [10, infinity)

◆ interleave3Channel8Bit48Elements() [1/2]

OCEAN_FORCE_INLINE void Ocean::CV::SSE::interleave3Channel8Bit48Elements ( const __m128i &  channel0,
const __m128i &  channel1,
const __m128i &  channel2,
__m128i &  interleavedA,
__m128i &  interleavedB,
__m128i &  interleavedC 

Interleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.


channel0The 16 elements of the first channel to be interleaved
channel1The 16 elements of the second channel to be interleaved
channel2The 16 elements of the third channel to be interleaved
interleavedAResulting first 16 of the interleaved data
interleavedBResulting second 16 of the interleaved data
interleavedCResulting third 16 of the interleaved data

◆ interleave3Channel8Bit48Elements() [2/2]

OCEAN_FORCE_INLINE void Ocean::CV::SSE::interleave3Channel8Bit48Elements ( const uint8_t *const  channel0,
const uint8_t *const  channel1,
const uint8_t *const  channel2,
uint8_t *const  interleaved 

Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.

channel0The 16 elements of the first channel to be interleaved, must be valid
channel1The 16 elements of the second channel to be interleaved, must be valid
channel2The 16 elements of the third channel to be interleaved, must be valid
interleavedThe resulting 48 interleaved elements, must be valid

◆ interpolation1Channel8Bit15Elements()

__m128i Ocean::CV::SSE::interpolation1Channel8Bit15Elements ( const __m128i &  values0,
const __m128i &  values1,
const __m128i &  fx_fy_fxfy_,
const __m128i &  fx_fyfxfy 

Interpolates 15 elements of 2x2 blocks for 1 channel 8 bit frames.

The interpolation is specified by tx and ty with range [0, 128u].

values0First row of 16 elements to be interpolated
values1Second row of 16 elements to be interpolated
fx_fy_fxfy_In each unsigned 16 bit element: ((128u - tx) * (128u - ty)) | (tx * (128u - ty)) << 16
fx_fyfxfyIn each unsigned 16 bit element: (128u - tx) * ty | (tx * ty) << 16
Interpolation result for 15 elements, which are (15 pixels)

◆ interpolation1Channel8Bit8Elements()

__m128i Ocean::CV::SSE::interpolation1Channel8Bit8Elements ( const __m128i &  values0,
const __m128i &  values1,
const __m128i &  fx_fy_,
const __m128i &  fxfy_,
const __m128i &  fx_fy,
const __m128i &  fxfy 

Interpolates 8 elements of 2x2 blocks for 1 channel 8 bit frames.

The first interpolation element results from the first and second element of both rows.
The second interpolation element results from the second and third element of both rows.
The eighth interpolation element results from the eighth and ninth.
The interpolation is specified by tx and ty with range [0, 128u].

values0First row of 9 elements to be interpolated
values1Second row of 9 elements to be interpolated
fx_fy_In each unsigned 16 bit element: Product of (128u - tx) and (128u - ty)
fxfy_In each unsigned 16 bit element: Product of (tx) and (128u - ty)
fx_fyIn each unsigned 16 bit element: Product of (128u - tx) and (ty)
fxfyIn each unsigned 16 bit element: Product of (tx) and (ty)
Interpolation result for 8 elements, which are 8 pixels

◆ interpolation2Channel16Bit1x1()

unsigned int Ocean::CV::SSE::interpolation2Channel16Bit1x1 ( const uint8_t *const  pixel,
const unsigned int  size,
const unsigned int  fx_y_,
const unsigned int  fxy_,
const unsigned int  fx_y,
const unsigned int  fxy 

Returns the interpolated pixel values for one 2 channel 16 bit pixel.

pixelUpper left pixel in the frame
sizeSize of one frame row in bytes
fx_y_Product of the inverse fx and the inverse fy interpolation factor
fxy_Product of the fx and the inverse fy interpolation factor
fx_yProduct of the inverse fx and the fy interpolation factor
fxyProduct of the fx and the fy interpolation factor
Interpolated pixel values

◆ interpolation2Channel16Bit8Elements()

__m128i Ocean::CV::SSE::interpolation2Channel16Bit8Elements ( const __m128i &  values0,
const __m128i &  values1,
const __m128i &  fx_fy_,
const __m128i &  fxfy_,
const __m128i &  fx_fy,
const __m128i &  fxfy 

Interpolates 8 elements of 2x2 blocks for 2 channel 16 bit frames.

The first interpolation element results from the first and second element of both rows.
The second interpolation element results from the second and third element of both rows.
The eighth interpolation element results from the eighth and ninth.
The interpolation is specified by tx and ty with range [0, 128u].

values0First row of 10 elements to be interpolated
values1Second row of 10 elements to be interpolated
fx_fy_In each unsigned 16 bit element: Product of (128u - tx) and (128u - ty)
fxfy_In each unsigned 16 bit element: Product of (tx) and (128u - ty)
fx_fyIn each unsigned 16 bit element: Product of (128u - tx) and (ty)
fxfyIn each unsigned 16 bit element: Product of (tx) and (ty)
Interpolation result for 8 elements, which are 4 pixels

◆ interpolation3Channel24Bit12Elements()

__m128i Ocean::CV::SSE::interpolation3Channel24Bit12Elements ( const __m128i &  values0,
const __m128i &  values1,
const __m128i &  fx_fy_fxfy_,
const __m128i &  fx_fyfxfy 

Interpolates 12 elements of 2x2 blocks for 3 channel 24 bit frames.

The interpolation is specified by tx and ty with range [0, 128u].

values0First row of 15 elements to be interpolated
values1Second row of 15 elements to be interpolated
fx_fy_fxfy_In each unsigned 16 bit element: ((128u - tx) * (128u - ty)) | (tx * (128u - ty)) << 16
fx_fyfxfyIn each unsigned 16 bit element: (128u - tx) * ty | (tx * ty) << 16
Interpolation result for 12 elements, which are (4 pixels)

◆ interpolation3Channel24Bit8Elements()

__m128i Ocean::CV::SSE::interpolation3Channel24Bit8Elements ( const __m128i &  values0,
const __m128i &  values1,
const __m128i &  fx_fy_,
const __m128i &  fxfy_,
const __m128i &  fx_fy,
const __m128i &  fxfy 

Interpolates 8 elements of 2x2 blocks for 3 channel 24 bit frames.

The first interpolation element results from the first and second element of both rows.
The second interpolation element results from the second and third element of both rows.
The eighth interpolation element results from the eighth and ninth.
The interpolation is specified by tx and ty with range [0, 128u].

values0First row of 11 elements to be interpolated
values1Second row of 11 elements to be interpolated
fx_fy_In each unsigned 16 bit element: Product of (128u - tx) and (128u - ty)
fxfy_In each unsigned 16 bit element: Product of (tx) and (128u - ty)
fx_fyIn each unsigned 16 bit element: Product of (128u - tx) and (ty)
fxfyIn each unsigned 16 bit element: Product of (tx) and (ty)
Interpolation result for 8 elements, which are (2 2/3 pixels)

◆ interpolation4Channel32Bit2x4Elements()

__m128i Ocean::CV::SSE::interpolation4Channel32Bit2x4Elements ( const __m128i &  values0,
const __m128i &  values1,
const __m128i &  fx_fy_,
const __m128i &  fxfy_,
const __m128i &  fx_fy,
const __m128i &  fxfy 

Interpolates 2x4 elements (two seperated blocks of 4 elements) of 2x2 blocks for 4 channel 32 bit frames.

The first interpolation element results from the first and second element of both rows.
The second interpolation element results from the second and third element of both rows.
The eighth interpolation element results from the eighth and ninth.
The interpolation is specified by tx and ty with range [0, 128u].

values0First row of 16 elements to be interpolated
values1Second row of 16 elements to be interpolated
fx_fy_In each unsigned 16 bit element: Product of (128u - tx) and (128u - ty)
fxfy_In each unsigned 16 bit element: Product of (tx) and (128u - ty)
fx_fyIn each unsigned 16 bit element: Product of (128u - tx) and (ty)
fxfyIn each unsigned 16 bit element: Product of (tx) and (ty)
Interpolation result for 8 elements, which are (2 2/3 pixels)

◆ interpolation4Channel32Bit8Elements()

__m128i Ocean::CV::SSE::interpolation4Channel32Bit8Elements ( const __m128i &  values0,
const __m128i &  values1,
const __m128i &  fx_fy_,
const __m128i &  fxfy_,
const __m128i &  fx_fy,
const __m128i &  fxfy 

Interpolates 8 elements of 2x2 blocks for 4 channel 32 bit frames.

The first interpolation element results from the first and second element of both rows.
The second interpolation element results from the second and third element of both rows.
The eighth interpolation element results from the eighth and ninth.
The interpolation is specified by tx and ty with range [0, 128u].

values0First row of 12 elements to be interpolated
values1Second row of 12 elements to be interpolated
fx_fy_In each unsigned 16 bit element: Product of (128u - tx) and (128u - ty)
fxfy_In each unsigned 16 bit element: Product of (tx) and (128u - ty)
fx_fyIn each unsigned 16 bit element: Product of (128u - tx) and (ty)
fxfyIn each unsigned 16 bit element: Product of (tx) and (ty)
Interpolation result for 8 elements, which are (2 pixels)

◆ load128i()

__m128i Ocean::CV::SSE::load128i ( const void *const  buffer)

Loads a 128i value from the memory.

bufferBuffer to be loaded (does not need to be aligned on any particular boundary), ensure that the buffer has a size of at least 16 bytes
Resulting value

◆ load128iLower64()

__m128i Ocean::CV::SSE::load128iLower64 ( const void *const  buffer)

Loads the lower 64 bit of a 128i value from the memory.

The upper 64 bit are zeroed.

bufferBuffer to be loaded (does not need to be aligned on any particular boundary), ensure that the buffer has a size of at least 8 bytes
Resulting value

◆ load_u8_10_upper_zero() [1/2]

template<bool tBufferHas16Bytes>
__m128i Ocean::CV::SSE::load_u8_10_upper_zero ( const uint8_t *const  buffer)

Loads 10 bytes from memory, which holds either at least 16 bytes or exactly 10 bytes, to a 128i value and sets the remaining bytes of the resulting 128i value to zero.

The loaded memory will be stored in the upper 10 bytes of the 128i value while the lowest remaining 6 bytes will be set to zero. Thus, the resulting 128 bit value has the following byte pattern (high bits left, low bits right): [09 08 07 06 05 04 03 02 01 00 ZZ ZZ ZZ ZZ ZZ ZZ], with ZZ meaning zero.

bufferBuffer to be loaded (does not need to be aligned on any particular boundary)
Resulting 128 bit value
Template Parameters
tBufferHas16BytesTrue, if the buffer holds at least 16 bytes; False, if the buffer holds only 10 bytes

◆ load_u8_10_upper_zero() [2/2]

__m128i Ocean::CV::SSE::load_u8_10_upper_zero ( const uint8_t *const  buffer)

◆ load_u8_13_lower_random() [1/2]

template<bool tBufferHas16Bytes>
__m128i Ocean::CV::SSE::load_u8_13_lower_random ( const uint8_t *const  buffer)

Loads 13 bytes from memory, which holds either at least 16 bytes or exactly 13 bytes, to a 128i value while the remaining byte of the resulting 128i value will be random.

The loaded memory will be stored in the lower 13 bytes of the 128i value while the highest remaining 3 byte will be random.
Thus, the resulting 128 bit value has the following byte pattern (high bits left, low bits right): [?? ?? ?? 12 11 10 09 08 07 06 05 04 03 02 01 00], with ?? meaning a random value.

bufferBuffer to be loaded (does not need to be aligned on any particular boundary)
Resulting 128 bit value
Template Parameters
tBufferHas16BytesTrue, if the buffer holds at least 16 bytes; False, if the buffer holds only 13 bytes

◆ load_u8_13_lower_random() [2/2]

__m128i Ocean::CV::SSE::load_u8_13_lower_random ( const uint8_t *const  buffer)

◆ load_u8_15_lower_random() [1/2]

template<bool tBufferHas16Bytes>
__m128i Ocean::CV::SSE::load_u8_15_lower_random ( const uint8_t *const  buffer)

Loads 15 bytes from memory, which holds either at least 16 bytes or exactly 15 bytes, to a 128i value while the remaining byte of the resulting 128i value will be random.

The loaded memory will be stored in the lower 15 bytes of the 128i value while the highest remaining 1 byte will be random.
Thus, the resulting 128 bit value has the following byte pattern (high bits left, low bits right): [?? 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00], with ?? meaning a random value.

bufferBuffer to be loaded (does not need to be aligned on any particular boundary)
Resulting 128 bit value
Template Parameters
tBufferHas16BytesTrue, if the buffer holds at least 16 bytes; False, if the buffer holds only 15 bytes

◆ load_u8_15_lower_random() [2/2]

__m128i Ocean::CV::SSE::load_u8_15_lower_random ( const uint8_t *const  buffer)

◆ load_u8_15_lower_zero() [1/2]

template<bool tBufferHas16Bytes>
__m128i Ocean::CV::SSE::load_u8_15_lower_zero ( const uint8_t *const  buffer)

Loads 15 bytes from memory, which holds either at least 16 bytes or exactly 15 bytes, to a 128i value and sets the remaining byte of the resulting 128i value to zero.

The loaded memory will be stored in the lower 15 bytes of the 128i value while the highest remaining 1 byte will be set to zero.
Thus, the resulting 128 bit value has the following byte pattern (high bits left, low bits right): [– 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00], with ZZ meaning zero.

bufferBuffer to be loaded (does not need to be aligned on any particular boundary)
Resulting 128 bit value
Template Parameters
tBufferHas16BytesTrue, if the buffer holds at least 16 bytes; False, if the buffer holds only 15 bytes

◆ load_u8_15_lower_zero() [2/2]

__m128i Ocean::CV::SSE::load_u8_15_lower_zero ( const uint8_t *const  buffer)

◆ load_u8_15_upper_zero() [1/2]

template<bool tBufferHas16Bytes>
__m128i Ocean::CV::SSE::load_u8_15_upper_zero ( const uint8_t *const  buffer)

Loads 15 bytes from memory, which holds either at least 16 bytes or exactly 15 bytes, to a 128i value and sets the remaining byte of the resulting 128i value to zero.

The loaded memory will be stored in the upper 15 bytes of the 128i value while the lowest remaining 1 byte will be set to zero. Thus, the resulting 128 bit value has the following byte pattern (high bits left, low bits right): [14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 ZZ], with ZZ meaning zero.

bufferBuffer to be loaded (does not need to be aligned on any particular boundary)
Resulting 128 bit value
Template Parameters
tBufferHas16BytesTrue, if the buffer holds at least 16 bytes; False, if the buffer holds only 15 bytes

◆ load_u8_15_upper_zero() [2/2]

__m128i Ocean::CV::SSE::load_u8_15_upper_zero ( const uint8_t *const  buffer)

◆ load_u8_16_and_shift_right()

template<unsigned int tShiftBytes>
__m128i Ocean::CV::SSE::load_u8_16_and_shift_right ( const uint8_t *const  buffer)

Loads 16 bytes from memory which is at least 16 bytes large and shifts the 128i value by a specified number of bytes to the right (by inserting zeros).

This function can be used if the remaining buffer is smaller than 16 bytes while the buffer exceeds/continues in the lower address space (from the original point of interest).
Thus, this function an handle a buffer with the following pattern (with lower address left and high address right):
| ?? ?? ?? ?? ?? ?? ?? ?? ?? V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 |, where ?? represent random values in our buffer (in the lower address space), and VX represent the values of interest and V0 the location to which 'buffer' is pointing to.
by load_u8_16_and_shift_right<6>(buffer - 6);
The resulting 128i register will then be composed of (high bits left, low bits right): [00 00 00 00 00 00 V9 V8 V7 V6 V5 V4 V3 V2 V1 V0].

bufferThe actual address from which the 16 bytes will be loaded, must be valid and must be at least 16 bytes large
The resulting 128 bit value
Template Parameters
tShiftBytesThe number of bytes which will be shifted (to the right) after the memory has loaded, with range [0, 16]

◆ moveHighBits16_8()

__m128i Ocean::CV::SSE::moveHighBits16_8 ( const __m128i &  value)

Moves the higher 8 bits of eight 16 bit elements to the lower 8 bits and fills the high bits with 0.

Result: 0P0N-0L0J-0H0F-0D0B

valueValue to remove the high bits for

◆ moveHighBits16_8_5()

__m128i Ocean::CV::SSE::moveHighBits16_8_5 ( const __m128i &  value)

Moves the higher 8 bits of five 16 bit elements to the lower 8 bits and fills the high bits with 0.

Result: 0000-000J-0H0F-0D0B

valueValue to remove the high bits for

◆ moveHighBits16_8_6()

__m128i Ocean::CV::SSE::moveHighBits16_8_6 ( const __m128i &  value)

Moves the higher 8 bits of six 16 bit elements to the lower 8 bits and fills the high bits with 0.

Result: 0000-0L0J-0H0F-0D0B

valueValue to remove the high bits for

◆ moveHighBits16_8_7()

__m128i Ocean::CV::SSE::moveHighBits16_8_7 ( const __m128i &  value)

Moves the higher 8 bits of seven 16 bit elements to the lower 8 bits and fills the high bits with 0.

Result: 000N-0L0J-0H0F-0D0B

valueValue to remove the high bits for

◆ moveHighBits32_16()

__m128i Ocean::CV::SSE::moveHighBits32_16 ( const __m128i &  value)

Moves the higher 16 bits of four 32 bit elements to the lower 16 bits and fills the high bits with 0.

Result: 00PO-00LK-00HG-00DC

valueValue to remove the high bits for

◆ moveLowBits16_8ToHigh64()

__m128i Ocean::CV::SSE::moveLowBits16_8ToHigh64 ( const __m128i &  value)

Moves the lower 8 bits of eight 16 bit elements to the higher 64 bits and fills the low 64 bits with 0.

Result: OMKI-GECA-0000-0000

valueValue to remove the high bits for

◆ moveLowBits16_8ToLow64()

__m128i Ocean::CV::SSE::moveLowBits16_8ToLow64 ( const __m128i &  value)

Moves the lower 8 bits of eight 16 bit elements to the lower 64 bits and fills the high 64 bits with 0.

Result: 0000-0000-OMKI-GECA

valueValue to remove the high bits for

◆ moveLowBits32_16ToLow64()

__m128i Ocean::CV::SSE::moveLowBits32_16ToLow64 ( const __m128i &  value)

Moves the lower 16 bits of four 32 bit elements to the lower 64 bits and fills the high 64 bits with 0.

Result: 0000-0000-NMJI-FEBA

valueValue to remove the high bits for

◆ moveLowBits32_8ToLow32()

__m128i Ocean::CV::SSE::moveLowBits32_8ToLow32 ( const __m128i &  value)

Moves the lower 8 bits of four 32 bit elements to the lower 32 bits and fills the high 96 bits with 0.

Result: 0000-0000-0000-MIEA

valueValue to remove the high bits for

◆ multiplyInt8x16ToInt32x8()

OCEAN_FORCE_INLINE void Ocean::CV::SSE::multiplyInt8x16ToInt32x8 ( const __m128i &  values0,
const __m128i &  values1,
__m128i &  products0,
__m128i &  products1 

Multiplies 8 int16_t values with 8 int16_t values and returns the products as 8 int32_t results.

The pseudo code of the function is as follows:

products0[0] = values0[0] * values1[0]
products0[3] = values0[3] * values1[3]

products1[0] = values0[4] * values1[4]
products1[3] = values0[7] * values1[7]
values0The first 8 int16_t values to be multiplied
values1The second 8 int16_t values to be multiplied
products0The resulting first 4 int32_t products
products1The resulting second 4 int32_t products

◆ multiplyInt8x16ToInt32x8AndAccumulate()

OCEAN_FORCE_INLINE void Ocean::CV::SSE::multiplyInt8x16ToInt32x8AndAccumulate ( const __m128i &  values0,
const __m128i &  values1,
__m128i &  results0,
__m128i &  results1 

Multiplies 8 int16_t values with 8 int16_t values and adds the products to 8 int32_t values.

The pseudo code of the function is as follows:

results0[0] += values0[0] * values1[0]
results0[3] += values0[3] * values1[3]

results1[0] += values0[4] * values1[4]
results1[3] += values0[7] * values1[7]
values0The first 8 int16_t values to be multiplied
values1The second 8 int16_t values to be multiplied
results0The results to which the first 4 int32_t products will be added
results1The results to which the second 4 int32_t products will be added

◆ prefetchNTA()

void Ocean::CV::SSE::prefetchNTA ( const void *const  data)

Prefetches a block of non-temporal memory into non-temporal cache structure.

dataData to be prefetched

◆ prefetchT0()

void Ocean::CV::SSE::prefetchT0 ( const void *const  data)

Prefetches a block of temporal memory into all cache levels.

dataData to be prefetched

◆ prefetchT1()

void Ocean::CV::SSE::prefetchT1 ( const void *const  data)

Prefetches a block of temporal memory in all cache levels except 0th cache level.

dataData to be prefetched

◆ prefetchT2()

void Ocean::CV::SSE::prefetchT2 ( const void *const  data)

Prefetches a block of temporal memory in all cache levels, except 0th and 1st cache levels.

dataData to be prefetched

◆ removeHighBits16_8()

__m128i Ocean::CV::SSE::removeHighBits16_8 ( const __m128i &  value)

Removes the higher 8 bits of eight 16 bit elements.

Result: 0O0M-0K0I-0G0E-0C0A

valueValue to remove the high bits for

◆ removeHighBits16_8_7_lower()

__m128i Ocean::CV::SSE::removeHighBits16_8_7_lower ( const __m128i &  value)

Removes the higher 8 bits of eight 16 bit elements and sets the upper two bytes to zero.

Result: 000M-0K0I-0G0E-0C0A

valueValue to remove the high bits for

◆ removeHighBits16_8_7_upper()

__m128i Ocean::CV::SSE::removeHighBits16_8_7_upper ( const __m128i &  value)

Removes the higher 8 bits of eight 16 bit elements and sets the lower two bytes to zero.

Result: 0O0M-0K0I-0G0E-0C00

valueValue to remove the high bits for

◆ removeHighBits32_16()

__m128i Ocean::CV::SSE::removeHighBits32_16 ( const __m128i &  value)

Removes the higher 16 bits of four 32 bit elements.

Result: 00NM-00JI-00FE-00BA

valueValue to remove the high bits for

◆ removeLowBits32_16()

__m128i Ocean::CV::SSE::removeLowBits32_16 ( const __m128i &  value)

Removes the lower 16 bits of four 32 bit elements.

Result: PO00-LK00-HG00-DC00

valueValue to remove the lower bits for

◆ reverseChannelOrder2Channel8Bit32Elements()

OCEAN_FORCE_INLINE void Ocean::CV::SSE::reverseChannelOrder2Channel8Bit32Elements ( const uint8_t *  interleaved,
uint8_t *  reversedInterleaved 

Reverses the order of the channels of 16 pixels (32 elements) of an image with 2 interleaved channels and 8 bit per element (e.g., YA16 to AY16).

interleaved16 elements of an image with 2 channels and 8 bit per element (32 bytes)
reversedInterleavedResulting 32 elements with reversed channel order

◆ reverseChannelOrder3Channel8Bit48Elements() [1/3]

OCEAN_FORCE_INLINE void Ocean::CV::SSE::reverseChannelOrder3Channel8Bit48Elements ( const __m128i &  interleaved0,
const __m128i &  interleaved1,
const __m128i &  interleaved2,
__m128i &  reversedInterleaved0,
__m128i &  reversedInterleaved1,
__m128i &  reversedInterleaved2 

Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channels and 8 bit per element.

interleaved0First 16 elements holding the interleaved image data
interleaved1Second 16 elements holding the interleaved image data
interleaved2Third 16 elements holding the interleaved image data
reversedInterleaved0Resulting first 16 elements holding the interleaved image data with reversed channel order
reversedInterleaved1Resulting second 16 elements holding the interleaved image data with reversed channel order
reversedInterleaved2Resulting third 16 elements holding the interleaved image data with reversed channel order

◆ reverseChannelOrder3Channel8Bit48Elements() [2/3]

OCEAN_FORCE_INLINE void Ocean::CV::SSE::reverseChannelOrder3Channel8Bit48Elements ( const uint8_t *  interleaved,
uint8_t *  reversedInterleaved 

Reverses the order of the first and last channel of 48 elements (16 pixels) of an image with 3 interleaved channels and 8 bit per element (e.g., RGB24 to BGR24).

interleaved48 elements of an image with 3 channels and 8 bit per element (48 bytes)
reversedInterleavedResulting 48 elements with reversed channel order

◆ reverseChannelOrder3Channel8Bit48Elements() [3/3]

void Ocean::CV::SSE::reverseChannelOrder3Channel8Bit48Elements ( uint8_t *  interleaved)

Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channels and 8 bit per element (in place).

interleaved48 elements of an image with 3 channels and 8 bit per element (48 bytes)

◆ reverseChannelOrder4Channel8Bit64Elements()

OCEAN_FORCE_INLINE void Ocean::CV::SSE::reverseChannelOrder4Channel8Bit64Elements ( const uint8_t *  interleaved,
uint8_t *  reversedInterleaved 

Reverses the order of the channels of 16 pixels (64 elements) of an image with 4 interleaved channels and 8 bit per element (e.g., RGBA32 to ABGR24).

interleaved64 elements of an image with 4 channels and 8 bit per element (64 bytes)
reversedInterleavedResulting 64 elements with reversed channel order

◆ reverseElements8Bit48Elements() [1/3]

void Ocean::CV::SSE::reverseElements8Bit48Elements ( const __m128i &  elements0,
const __m128i &  elements1,
const __m128i &  elements2,
__m128i &  reversedElements0,
__m128i &  reversedElements1,
__m128i &  reversedElements2 

Reverses the order of 48 elements with 8 bit per element.

elements0First 16 elements
elements1Second 16 elements
elements2Third 16 elements
reversedElements0Resulting reversed first 16 elements
reversedElements1Resulting reversed second 16 elements
reversedElements2Resulting reversed third 16 elements

◆ reverseElements8Bit48Elements() [2/3]

void Ocean::CV::SSE::reverseElements8Bit48Elements ( const uint8_t *  elements,
uint8_t *  reversedElements 

Reverses the order of 48 elements with 8 bit per element.

elements48 elements that will be reversed
reversedElementsResulting reversed 48 elements

◆ reverseElements8Bit48Elements() [3/3]

void Ocean::CV::SSE::reverseElements8Bit48Elements ( uint8_t *  elements)

Reverses the order of 48 elements with 8 bit per element (in place).

elements48 elements that will be reversed

◆ set128i()

__m128i Ocean::CV::SSE::set128i ( const unsigned long long  high64,
const unsigned long long  low64 

Sets a 128i value by two 64 bit values.

high64High 64 bits to be set
low64Low 64 bits to be set
Resulting 128i value

◆ shiftAndMirrorChannelToBack4Channel32Bit()

void Ocean::CV::SSE::shiftAndMirrorChannelToBack4Channel32Bit ( const uint8_t *  elements,
uint8_t *  shiftedElements 

Shifts the channels of a 4 channel 32 bit pixels to the back and moves the back channel to the front channel and mirrors the four individual pixels.

elements16 elements of 4 pixels to be shifted and mirrored
shiftedElementsResulting shifted and mirrored elements

◆ shiftAndMirrorChannelToFront4Channel32Bit()

void Ocean::CV::SSE::shiftAndMirrorChannelToFront4Channel32Bit ( const uint8_t *  elements,
uint8_t *  shiftedElements 

Shifts the channels of a 4 channel 32 bit pixels to the front and moves the front channel to the back channel and mirrors the four individual pixels.

elements16 elements of 4 pixels to be shifted and mirrored
shiftedElementsResulting shifted and mirrored elements

◆ shiftChannelToBack4Channel32Bit()

void Ocean::CV::SSE::shiftChannelToBack4Channel32Bit ( const uint8_t *  elements,
uint8_t *  shiftedElements 

Shifts the channels of a 4 channel 32 bit pixels to the back and moves the back channel to the front channel.

The function takes four pixels DCBA DCBA DCBA DCBA and provides CBAD CBAD CBAD CBAD.

elements16 elements of 4 pixels to be shifted
shiftedElementsResulting shifted elements

◆ shiftChannelToFront4Channel32Bit()

void Ocean::CV::SSE::shiftChannelToFront4Channel32Bit ( const uint8_t *  elements,
uint8_t *  shiftedElements 

Shifts the channels of a 4 channel 32 bit pixels to the front and moves the front channel to the back channel.

The function takes four pixels DCBA DCBA DCBA DCBA and provides ADCB ADCB ADCB ADCB.

elements16 elements of 4 pixels to be shifted
shiftedElementsResulting shifted elements

◆ shuffleLow32ToLow32_8()

__m128i Ocean::CV::SSE::shuffleLow32ToLow32_8 ( const __m128i &  value)

Shuffles the lower four 8 bits to the low 8 bits of four 32 bit elements.

Result: 000D-000C-000B-000A

valueValue to be shuffled

◆ shuffleNeighbor2High64BitsToLow16_8()

__m128i Ocean::CV::SSE::shuffleNeighbor2High64BitsToLow16_8 ( const __m128i &  value)

Shuffles pairs of two neighbors of the high 64 bits to the low 8 bits of eight 16 bit elements.

valueValue to be shuffled

◆ shuffleNeighbor2Low64BitsToLow16_8()

__m128i Ocean::CV::SSE::shuffleNeighbor2Low64BitsToLow16_8 ( const __m128i &  value)

Shuffles pairs of two neighbors of the low 64 bits to the low 8 bits of eight 16 bit elements.

valueValue to be shuffled

◆ shuffleNeighbor4High64BitsToLow16_8()

__m128i Ocean::CV::SSE::shuffleNeighbor4High64BitsToLow16_8 ( const __m128i &  value)

Shuffles pairs of four neighbors of the high 64 bits to the low 8 bits of eight 16 bit elements.

Result: 0P0L-0O0K-0N0J-0M0I

valueValue to be shuffled

◆ shuffleNeighbor4Low64BitsToLow16_8()

__m128i Ocean::CV::SSE::shuffleNeighbor4Low64BitsToLow16_8 ( const __m128i &  value)

Shuffles pairs of four neighbors of the low 64 bits to the low 8 bits of eight 16 bit elements.

Result: 0H0D-0G0C-0F0B-0E0A

valueValue to be shuffled

◆ ssd2Channel16Bit1x1() [1/2]

unsigned int Ocean::CV::SSE::ssd2Channel16Bit1x1 ( const uint8_t *const  pixel0,
const uint8_t *const  pixel1,
const unsigned int  size0,
const unsigned int  size1,
const unsigned int  f0x_y_,
const unsigned int  f0xy_,
const unsigned int  f0x_y,
const unsigned int  f0xy,
const unsigned int  f1x_y_,
const unsigned int  f1xy_,
const unsigned int  f1x_y,
const unsigned int  f1xy 

Returns the interpolated sum of square difference for one 2 channel 16 bit pixel.

pixel0Uppler left pixel in the first frame
pixel1Uppler left pixel in the second frame
size0Size of one frame row in bytes
size1Size of one frame row in bytes
f0x_y_Product of the inverse fx and the inverse fy interpolation factor for the first image
f0xy_Product of the fx and the inverse fy interpolation factor for the first image
f0x_yProduct of the inverse fx and the fy interpolation factor for the first image
f0xyProduct of the fx and the fy interpolation factor for the first image
f1x_y_Product of the inverse fx and the inverse fy interpolation factor for the second image
f1xy_Product of the fx and the inverse fy interpolation factor for the second image
f1x_yProduct of the inverse fx and the fy interpolation factor for the second image
f1xyProduct of the fx and the fy interpolation factor for the second image
Interpolated sum of square difference

◆ ssd2Channel16Bit1x1() [2/2]

unsigned int Ocean::CV::SSE::ssd2Channel16Bit1x1 ( const uint8_t *const  pixel0,
const uint8_t *const  pixel1,
const unsigned int  size0,
const unsigned int  size1,
const unsigned int  f1x_y_,
const unsigned int  f1xy_,
const unsigned int  f1x_y,
const unsigned int  f1xy 

Returns the interpolated sum of square difference for one 2 channel 16 bit pixel.

pixel0Uppler left pixel in the first frame
pixel1Uppler left pixel in the second frame
size0Size of one frame row in bytes
size1Size of one frame row in bytes
f1x_y_Product of the inverse fx and the inverse fy interpolation factor for the second image
f1xy_Product of the fx and the inverse fy interpolation factor for the second image
f1x_yProduct of the inverse fx and the fy interpolation factor for the second image
f1xyProduct of the fx and the fy interpolation factor for the second image
Interpolated sum of square difference

◆ store128i()

void Ocean::CV::SSE::store128i ( const __m128i &  value,
uint8_t *const  buffer 

Stores a 128i value to the memory.

valueValue to be stored
bufferBuffer receiving the value (does not need to be aligned on any particular boundary)

◆ sum1Channel8Bit16Elements() [1/2]

__m128i Ocean::CV::SSE::sum1Channel8Bit16Elements ( const __m128i &  elements)

Sums 16 elements with 8 bit per element.

The results are stored as first 32 bit integer value (high bits left, low bits right): ???? ???? ???? 0000.

elements16 elements holding the image data
Resulting sums

◆ sum1Channel8Bit16Elements() [2/2]

__m128i Ocean::CV::SSE::sum1Channel8Bit16Elements ( const uint8_t *  elements)

Sums 16 elements with 8 bit per element.

The results are stored as first 32 bit integer value (high bits left, low bits right): ???? ???? ???? 0000.

elements16 elements holding the image data
Resulting sums

◆ sum1Channel8BitBack15Elements()

__m128i Ocean::CV::SSE::sum1Channel8BitBack15Elements ( const uint8_t *  elements)

Sums the last 15 elements of a 16 elements buffer with 8 bit per element, the beginning 1 element is interpreted as zero.

However, the provided buffer must be at least 16 bytes large as the entire 16 bytes will be loaded to the SSE register.
Thus, this functions handles one buffer with this pattern (while the memory starts left and ends right): [NA 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15]. The results are stored as first 32 bit integer value (high bits left, low bits right): ???? ???? ???? 0000.

elements(1+) 15 elements holding the image data
Resulting sum

◆ sum1Channel8BitFront15Elements()

template<bool tBufferHas16Bytes>
__m128i Ocean::CV::SSE::sum1Channel8BitFront15Elements ( const uint8_t *  elements)

Sums the first 15 elements of a buffer with 8 bit per element.

This function supports to load the 15 elements from a buffer with only 15 bytes or with a buffer with at least 16 bytes.
If the provided buffer holds at least 16 bytes the load function is much faster compared to the case if the buffer is not larger than 15 bytes.
The results are stored as first 32 bit integer value (high bits left, low bits right): ???? ???? ???? 0000.

elements16 elements holding the image data
Resulting sums
Template Parameters
tBufferHas16BytesTrue, if the buffer holds at least 16 bytes; False, if the buffer holds only 15 bytes

◆ sum_f32_4()

OCEAN_FORCE_INLINE float Ocean::CV::SSE::sum_f32_4 ( const __m128 &  value)

Adds the four (all four) individual 32 bit float of a m128 value and returns the result.

valueThe value which elements will be added
The resulting sum value

◆ sum_f64_2()

OCEAN_FORCE_INLINE double Ocean::CV::SSE::sum_f64_2 ( const __m128d &  value)

Adds the two (all two) individual 64 bit float of a m128 value and returns the result.

valueThe value which elements will be added
The resulting sum value

◆ sum_u32_4()

OCEAN_FORCE_INLINE unsigned int Ocean::CV::SSE::sum_u32_4 ( const __m128i &  value)

Adds the four (all four) individual 32 bit unsigned integer values of a m128i value and returns the result.

valueThe value which elements will be added
The resulting sum value

◆ sum_u32_first_2()

unsigned int Ocean::CV::SSE::sum_u32_first_2 ( const __m128i &  value)

Adds the first two individual 32 bit unsigned integer values of a m128i value and returns the result.

valueThe value which elements will be added
The resulting sum value

◆ sum_u32_first_third()

unsigned int Ocean::CV::SSE::sum_u32_first_third ( const __m128i &  value)

Adds the first and the second 32 bit unsigned integer values of a m128i value and returns the result.

valueThe value which elements will be added
The resulting sum value

◆ sumAbsoluteDifferences8Bit16Elements()

__m128i Ocean::CV::SSE::sumAbsoluteDifferences8Bit16Elements ( const uint8_t *const  image0,
const uint8_t *const  image1 

Sum absolute differences determination for 16 elements of an 16 elements buffer with 8 bit precision.

image0First 16 elements to determine the ssd for, may be non aligned
image1Second 16 elements to determine the ssd for, may be non aligned
SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

◆ sumAbsoluteDifferences8BitBack11Elements()

__m128i Ocean::CV::SSE::sumAbsoluteDifferences8BitBack11Elements ( const uint8_t *const  image0,
const uint8_t *const  image1 

Sum absolute differences determination for the last 11 elements of a 16 elements buffer with 8 bit precision.

image0First 11 elements to determine the sad for, may be non aligned
image1Second 11 elements to determine the sad for, may be non aligned
SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

◆ sumAbsoluteDifferences8BitFront10Elements()

template<bool tBufferHas16Bytes>
__m128i Ocean::CV::SSE::sumAbsoluteDifferences8BitFront10Elements ( const uint8_t *const  image0,
const uint8_t *const  image1 

Sum absolute differences determination for the first 10 elements of a buffer with 8 bit precision.

This function supports to load the 10 elements from a buffer with only 10 bytes or with a buffer with at least 16 bytes.

image0First 10 elements to determine the sad for, may be non aligned
image1Second 10 elements to determine the sad for, may be non aligned
SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])
Template Parameters
tBufferHas16BytesTrue, if the buffer holds at least 16 bytes; False, if the buffer holds 10 bytes only

◆ sumAbsoluteDifferences8BitFront15Elements()

template<bool tBufferHas16Bytes>
__m128i Ocean::CV::SSE::sumAbsoluteDifferences8BitFront15Elements ( const uint8_t *const  image0,
const uint8_t *const  image1 

Sum absolute differences determination for the first 15 elements of a buffer with 8 bit precision.

This function supports to load the 15 elements from a buffer with only 15 bytes or with a buffer with at least 16 bytes.

image0First 15 elements to determine the sad for, may be non aligned
image1Second 15 elements to determine the sad for, may be non aligned
SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])
Template Parameters
tBufferHas16BytesTrue, if the buffer holds at least 16 bytes; False, if the buffer holds 15 bytes only

◆ sumInterleave3Channel8Bit45Elements()

__m128i Ocean::CV::SSE::sumInterleave3Channel8Bit45Elements ( const uint8_t *  interleaved)

Sums 15 elements individually for an interleaved pixel format with 3 channels and 8 bit per channel and element.

The results are stored in three 32 bit integer values (high bits left, low bits right): ???? 2222 1111 0000.

interleaved45 elements holding the interleaved image data
Resulting sums

◆ sumInterleave3Channel8Bit48Elements() [1/2]

__m128i Ocean::CV::SSE::sumInterleave3Channel8Bit48Elements ( const __m128i &  interleaved0,
const __m128i &  interleaved1,
const __m128i &  interleaved2 

Sums 16 elements individually for an interleaved pixel format with 3 channels and 8 bit per channel and element.

The results are stored in three 32 bit integer values (high bits left, low bits right): ???? 2222 1111 0000.

interleaved0First 16 elements holding the interleaved image data
interleaved1Second 16 elements holding the interleaved image data
interleaved2Third 16 elements holding the interleaved image data
Resulting sums

◆ sumInterleave3Channel8Bit48Elements() [2/2]

__m128i Ocean::CV::SSE::sumInterleave3Channel8Bit48Elements ( const uint8_t *  interleaved)

Sums 16 elements individually for an interleaved pixel format with 3 channels and 8 bit per channel and element.

The results are stored in three 32 bit integer values (high bits left, low bits right): ???? 2222 1111 0000.

interleaved48 elements holding the interleaved image data
Resulting sums

◆ sumSquareDifference8Bit16Elements() [1/2]

__m128i Ocean::CV::SSE::sumSquareDifference8Bit16Elements ( const __m128i &  row0,
const __m128i &  row1 

Sum square difference determination for 16 elements with 8 bit precision.

row0First 16 elements to determine the ssd for
row1Second 16 elements to determine the ssd for
SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

◆ sumSquareDifference8Bit16Elements() [2/2]

__m128i Ocean::CV::SSE::sumSquareDifference8Bit16Elements ( const uint8_t *const  image0,
const uint8_t *const  image1 

Sum square difference determination for 16 elements with 8 bit precision.

image0First 16 elements to determine the ssd for, may be non aligned
image1Second 16 elements to determine the ssd for, may be non aligned
SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

◆ sumSquareDifference8Bit16ElementsAligned16()

__m128i Ocean::CV::SSE::sumSquareDifference8Bit16ElementsAligned16 ( const uint8_t *const  image0,
const uint8_t *const  image1 

Sum square difference determination for 16 elements with 8 bit precision.

image0First 16 elements to determine the ssd for, may be non aligned
image1Second 16 elements to determine the ssd for, may be non aligned
SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

◆ sumSquareDifference8BitBack12Elements()

__m128i Ocean::CV::SSE::sumSquareDifference8BitBack12Elements ( const uint8_t *const  image0,
const uint8_t *const  image1 

Sum square difference determination for the last 12 elements of an 16 elements buffer with 8 bit precision, the beginning 4 elements are interpreted as zero.

However, the provides buffers must be at least 16 bytes large as the entire 16 bytes will be loaded to the SSE registers.
Thus, this function handles two buffers with this pattern (while the memory starts left and ends right): [NA NA NA NA 04 05 06 07 08 09 10 11 12 13 14 15].

image0First (4+) 12 elements to determine the ssd for, with any alignment
image1Second (4+) 12 elements to determine the ssd for, with any alignment
SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

◆ sumSquareDifference8BitBack13Elements()

__m128i Ocean::CV::SSE::sumSquareDifference8BitBack13Elements ( const uint8_t *const  image0,
const uint8_t *const  image1 

Sum square difference determination for the last 13 elements of an 16 elements buffer with 8 bit precision, the beginning 3 elements are interpreted as zero.

However, the provides buffers must be at least 16 bytes large as the entire 16 bytes will be loaded to the SSE registers.
Thus, this function handles two buffers with this pattern (while the memory starts left and ends rights: [NA NA NA 03 04 05 06 07 08 09 10 11 12 13 14 15].

image0First (3+) 13 elements to determine the ssd for, may be non aligned
image1Second (3+) 13 elements to determine the ssd for, may be non aligned
SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

◆ sumSquareDifference8BitFront12Elements()

__m128i Ocean::CV::SSE::sumSquareDifference8BitFront12Elements ( const uint8_t *const  image0,
const uint8_t *const  image1 

Sum square difference determination for the first 12 elements of an 16 elements buffer with 8 bit precision, the remaining 4 elements are set to zero.

However, the provides buffers must be at least 16 bytes large as the entire 16 bytes will be loaded to the SSE registers.
Thus, this function handles two buffers with this pattern (while the memory starts left and ends rights: [00 01 02 03 04 05 06 07 08 09 10 11 NA NA NA NA].

image0First 12 (+4) elements to determine the ssd for, with any alignment
image1Second 12 (+4) elements to determine the ssd for, with any alignment
SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

◆ sumSquareDifference8BitFront13Elements()

template<bool tBufferHas16Bytes>
__m128i Ocean::CV::SSE::sumSquareDifference8BitFront13Elements ( const uint8_t *const  image0,
const uint8_t *const  image1 

Sum square difference determination for the first 13 elements of a buffer with 8 bit precision.

This function supports to load the 13 elements from a buffer with only 13 bytes or with a buffer with at least 16 bytes.

image0First 13 elements to determine the ssd for, may be non aligned
image1Second 13 elements to determine the ssd for, may be non aligned
SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])
Template Parameters
tBufferHas16BytesTrue, if the buffer holds at least 16 bytes; False, if the buffer holds 13 bytes only

◆ sumSquareDifference8BitFront15Elements()

template<bool tBufferHas16Bytes>
__m128i Ocean::CV::SSE::sumSquareDifference8BitFront15Elements ( const uint8_t *const  image0,
const uint8_t *const  image1 

Sum square difference determination for the first 15 elements of a buffer with 8 bit precision.

This function supports to load the 15 elements from a buffer with only 15 bytes or with a buffer with at least 16 bytes.

image0First 15 elements to determine the ssd for, may be non aligned
image1Second 15 elements to determine the ssd for, may be non aligned
SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])
Template Parameters
tBufferHas16BytesTrue, if the buffer holds at least 16 bytes; False, if the buffer holds 15 bytes only

◆ sumSquareDifferences8BitBack11Elements()

__m128i Ocean::CV::SSE::sumSquareDifferences8BitBack11Elements ( const uint8_t *const  image0,
const uint8_t *const  image1 

Sum square differences determination for the last 11 elements of an 16 elements buffer with 8 bit precision.

image0First 11 elements to determine the ssd for, may be non aligned
image1Second 11 elements to determine the ssd for, may be non aligned
SSD result distributed over four terms of the sum, thus result is (m128i_u32[0] + m128i_u32[1] + m128i_u32[2] + m128i_u32[3])

◆ swapReversedChannelOrder3Channel8Bit48Elements()

void Ocean::CV::SSE::swapReversedChannelOrder3Channel8Bit48Elements ( uint8_t *  first,
uint8_t *  second 

Reverses the order of the first and last channel of two sets of 48 elements of an image with 3 interleaved channels and 8 bit per element and further swaps both sets.

firstFirst 48 elements of an image with 3 channels and 8 bit per element (48 bytes)
secondSecond 48 elements of an image with 3 channels and 8 bit per element (48 bytes)

◆ swapReversedElements8Bit48Elements()

void Ocean::CV::SSE::swapReversedElements8Bit48Elements ( uint8_t *  first,
uint8_t *  second 

Reverses the order of two sets of 48 elements with 8 bit per element and further swaps both sets.

firstFirst 48 elements that will be reversed and swapped with the second 48 elements
secondSecond 48 elements that will be reversed and swapped with the first 48 elements

◆ value_u16()

template<unsigned int tIndex>
uint16_t Ocean::CV::SSE::value_u16 ( const __m128i &  value)

Returns one specific 16 bit unsigned integer value of a m128i value object.

valueThe value from which the 16 bit value will be returned
The requested 16 bit value
Template Parameters
tIndexThe index of the requested 16 bit integer value, with range [0, 7]

◆ value_u32()

template<unsigned int tIndex>
unsigned int Ocean::CV::SSE::value_u32 ( const __m128i &  value)

Returns one specific 32 bit unsigned integer value of a m128i value object.

valueThe value from which the 32 bit value will be returned
The requested 32 bit value
Template Parameters
tIndexThe index of the requested 32 bit integer value, with range [0, 3]

◆ value_u8() [1/2]

template<unsigned int tIndex>
uint8_t Ocean::CV::SSE::value_u8 ( const __m128i &  value)

Returns one specific 8 bit unsigned integer value of a m128i value object.

valueThe value from which the 8 bit value will be returned
The requested 8 bit value
Template Parameters
tIndexThe index of the requested 8 bit integer value, with range [0, 15]

◆ value_u8() [2/2]

uint8_t Ocean::CV::SSE::value_u8 ( const __m128i &  value,
const unsigned int  index 

Returns one specific 8 bit unsigned integer value of a m128i value object.

valueThe value from which the 8 bit value will be returned
indexThe index of the requested 8 bit integer value, with range [0, 15]
The requested 8 bit value

The documentation for this class was generated from the following file: