Ocean
|
This class implements computer vision functions using SSE extensions. More...
Data Structures | |
union | M128 |
This union defines a wrapper for the __m128 SSE intrinsic data type. More... | |
union | M128d |
This union defines a wrapper for the __m128 SSE intrinsic data type. More... | |
union | M128i |
This union defines a wrapper for the __m128i SSE intrinsic data type. More... | |
Static Public Member Functions | |
static void | prefetchT0 (const void *const data) |
Prefetches a block of temporal memory into all cache levels. More... | |
static void | prefetchT1 (const void *const data) |
Prefetches a block of temporal memory in all cache levels except 0th cache level. More... | |
static void | prefetchT2 (const void *const data) |
Prefetches a block of temporal memory in all cache levels, except 0th and 1st cache levels. More... | |
static void | prefetchNTA (const void *const data) |
Prefetches a block of non-temporal memory into non-temporal cache structure. More... | |
template<unsigned int tIndex> | |
static uint8_t | value_u8 (const __m128i &value) |
Returns one specific 8 bit unsigned integer value of a m128i value object. More... | |
static uint8_t | value_u8 (const __m128i &value, const unsigned int index) |
Returns one specific 8 bit unsigned integer value of a m128i value object. More... | |
template<unsigned int tIndex> | |
static uint16_t | value_u16 (const __m128i &value) |
Returns one specific 16 bit unsigned integer value of a m128i value object. More... | |
template<unsigned int tIndex> | |
static unsigned int | value_u32 (const __m128i &value) |
Returns one specific 32 bit unsigned integer value of a m128i value object. More... | |
static OCEAN_FORCE_INLINE unsigned int | sum_u32_4 (const __m128i &value) |
Adds the four (all four) individual 32 bit unsigned integer values of a m128i value and returns the result. More... | |
static unsigned int | sum_u32_first_2 (const __m128i &value) |
Adds the first two individual 32 bit unsigned integer values of a m128i value and returns the result. More... | |
static unsigned int | sum_u32_first_third (const __m128i &value) |
Adds the first and the second 32 bit unsigned integer values of a m128i value and returns the result. More... | |
static OCEAN_FORCE_INLINE float | sum_f32_4 (const __m128 &value) |
Adds the four (all four) individual 32 bit float of a m128 value and returns the result. More... | |
static OCEAN_FORCE_INLINE double | sum_f64_2 (const __m128d &value) |
Adds the two (all two) individual 64 bit float of a m128 value and returns the result. More... | |
static __m128i | sumSquareDifferences8BitBack11Elements (const uint8_t *const image0, const uint8_t *const image1) |
Sum square differences determination for the last 11 elements of an 16 elements buffer with 8 bit precision. More... | |
static __m128i | sumSquareDifference8BitFront12Elements (const uint8_t *const image0, const uint8_t *const image1) |
Sum square difference determination for the first 12 elements of an 16 elements buffer with 8 bit precision, the remaining 4 elements are set to zero. More... | |
static __m128i | sumSquareDifference8BitBack12Elements (const uint8_t *const image0, const uint8_t *const image1) |
Sum square difference determination for the last 12 elements of an 16 elements buffer with 8 bit precision, the beginning 4 elements are interpreted as zero. More... | |
template<bool tBufferHas16Bytes> | |
static __m128i | sumSquareDifference8BitFront13Elements (const uint8_t *const image0, const uint8_t *const image1) |
Sum square difference determination for the first 13 elements of a buffer with 8 bit precision. More... | |
static __m128i | sumSquareDifference8BitBack13Elements (const uint8_t *const image0, const uint8_t *const image1) |
Sum square difference determination for the last 13 elements of an 16 elements buffer with 8 bit precision, the beginning 3 elements are interpreted as zero. More... | |
template<bool tBufferHas16Bytes> | |
static __m128i | sumSquareDifference8BitFront15Elements (const uint8_t *const image0, const uint8_t *const image1) |
Sum square difference determination for the first 15 elements of a buffer with 8 bit precision. More... | |
static __m128i | sumSquareDifference8Bit16Elements (const uint8_t *const image0, const uint8_t *const image1) |
Sum square difference determination for 16 elements with 8 bit precision. More... | |
static __m128i | sumSquareDifference8Bit16ElementsAligned16 (const uint8_t *const image0, const uint8_t *const image1) |
Sum square difference determination for 16 elements with 8 bit precision. More... | |
static __m128i | sumSquareDifference8Bit16Elements (const __m128i &row0, const __m128i &row1) |
Sum square difference determination for 16 elements with 8 bit precision. More... | |
static void | average8Elements1Channel32Bit2x2 (const float *const image0, const float *const image1, float *const result) |
Averages 8 elements of 2x2 blocks for 1 channel 32 bit frames. More... | |
static void | average8Elements1Channel8Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result) |
Averages 8 elements of 2x2 blocks for 1 channel 8 bit frames. More... | |
static void | average8ElementsBinary1Channel8Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result, const uint16_t threshold=776u) |
Averages 8 elements of 2x2 blocks for 1 binary (x00 or 0xFF) frames. More... | |
static void | average16Elements1Channel8Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result) |
Averages 16 elements of 2x2 blocks for 1 channel 8 bit frames. More... | |
static void | average16ElementsBinary1Channel8Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result, const uint16_t threshold=776u) |
Averages 16 elements of 2x2 blocks for 1 binary (x00 or 0xFF) frames. More... | |
static void | average32Elements1Channel8Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result) |
Averages 32 elements of 2x2 blocks for 1 channel 8 bit frames. More... | |
static void | average32ElementsBinary1Channel8Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result, const uint16_t threshold=776u) |
Averages 32 elements of 2x2 blocks for 1 binary (0x00 or 0xFF) frames. More... | |
static void | average8Elements2Channel16Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result) |
Averages 8 elements of 2x2 blocks for 2 channel 16 bit frames. More... | |
static void | average8Elements2Channel64Bit2x2 (const float *const image0, const float *const image1, float *const result) |
Averages 8 elements of 2x2 blocks for 2 channel 64 bit frames. More... | |
static void | average16Elements2Channel16Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result) |
Averages 16 elements of 2x2 blocks for 2 channel 16 bit frames. More... | |
static void | average32Elements2Channel16Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result) |
Averages 32 elements of 2x2 blocks for 2 channel 16 bit frames. More... | |
static void | average6Elements3Channel96Bit2x2 (const float *const image0, const float *const image1, float *const result) |
Averages 6 elements of 2x2 blocks for 3 channel 96 bit frames. More... | |
static void | average24Elements3Channel24Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result) |
Averages 24 elements of 2x2 blocks for 3 channel 24 bit frames. More... | |
static void | average8Elements4Channel128Bit2x2 (const float *const image0, const float *const image1, float *const result) |
Averages 8 elements of 2x2 blocks for 4 channel 128 bit frames. More... | |
static void | average16Elements4Channel32Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result) |
Averages 16 elements of 2x2 blocks for 4 channel 32 bit frames. More... | |
static void | average32Elements4Channel32Bit2x2 (const uint8_t *const image0, const uint8_t *const image1, uint8_t *const result) |
Averages 32 elements of 2x2 blocks for 4 channel 32 bit frames. More... | |
static void | average30Elements1Channel8Bit3x3 (const uint8_t *const image0, const uint8_t *const image1, const uint8_t *const image2, uint8_t *const result) |
Averages 30 elements of 3x3 blocks for 1 channel 8 bit frames. More... | |
static __m128i | addOffsetBeforeRightShiftDivisionByTwoSigned16Bit (const __m128i &value) |
Adds 1 to each signed 16 bit value which is both, negative and odd, so that each value can be right shifted by one bit to allow a correct division by two. More... | |
static __m128i | addOffsetBeforeRightShiftDivisionSigned16Bit (const __m128i &value, const unsigned int rightShifts) |
Adds 2^shifts - 1 to each negative signed 16 bit value, so they each value can be right shifted to allow a correct division by 2^shifts. More... | |
static __m128i | divideByRightShiftSigned16Bit (const __m128i &value, const unsigned int rightShifts) |
Divides eight signed 16 bit values by applying a right shift. More... | |
static __m128i | addOffsetBeforeRightShiftDivisionByTwoSigned32Bit (const __m128i &value) |
Adds 1 to each signed 32 bit value which is both, negative and odd, so that each value can be right shifted by one bit to allow a correct division by two. More... | |
static __m128i | addOffsetBeforeRightShiftDivisionSigned32Bit (const __m128i &value, const unsigned int rightShifts) |
Adds 2^shifts - 1 to each negative signed 32 bit value, so they each value can be right shifted to allow a correct division by 2^shifts. More... | |
static __m128i | divideByRightShiftSigned32Bit (const __m128i &value, const unsigned int rightShifts) |
Divides eight signed 32 bit values by applying a right shift. More... | |
static void | gradientHorizontalVertical8Elements1Channel8Bit (const uint8_t *source, int8_t *response, const unsigned int width) |
Determines the horizontal and the vertical gradients for 16 following pixels for a given 1 channel 8 bit frame. More... | |
static void | gradientHorizontalVertical8Elements3Products1Channel8Bit (const uint8_t *source, int16_t *response, const unsigned int width) |
Determines the squared horizontal and vertical gradients and the product of both gradients for 16 following pixels for a given 1 channel 8 bit frame. More... | |
static __m128i | sumAbsoluteDifferences8BitBack11Elements (const uint8_t *const image0, const uint8_t *const image1) |
Sum absolute differences determination for the last 11 elements of a 16 elements buffer with 8 bit precision. More... | |
template<bool tBufferHas16Bytes> | |
static __m128i | sumAbsoluteDifferences8BitFront10Elements (const uint8_t *const image0, const uint8_t *const image1) |
Sum absolute differences determination for the first 10 elements of a buffer with 8 bit precision. More... | |
template<bool tBufferHas16Bytes> | |
static __m128i | sumAbsoluteDifferences8BitFront15Elements (const uint8_t *const image0, const uint8_t *const image1) |
Sum absolute differences determination for the first 15 elements of a buffer with 8 bit precision. More... | |
static __m128i | interpolation1Channel8Bit8Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_, const __m128i &fxfy_, const __m128i &fx_fy, const __m128i &fxfy) |
Interpolates 8 elements of 2x2 blocks for 1 channel 8 bit frames. More... | |
static __m128i | interpolation2Channel16Bit8Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_, const __m128i &fxfy_, const __m128i &fx_fy, const __m128i &fxfy) |
Interpolates 8 elements of 2x2 blocks for 2 channel 16 bit frames. More... | |
static __m128i | interpolation3Channel24Bit8Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_, const __m128i &fxfy_, const __m128i &fx_fy, const __m128i &fxfy) |
Interpolates 8 elements of 2x2 blocks for 3 channel 24 bit frames. More... | |
static __m128i | interpolation1Channel8Bit15Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_fxfy_, const __m128i &fx_fyfxfy) |
Interpolates 15 elements of 2x2 blocks for 1 channel 8 bit frames. More... | |
static __m128i | interpolation3Channel24Bit12Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_fxfy_, const __m128i &fx_fyfxfy) |
Interpolates 12 elements of 2x2 blocks for 3 channel 24 bit frames. More... | |
static __m128i | interpolation4Channel32Bit8Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_, const __m128i &fxfy_, const __m128i &fx_fy, const __m128i &fxfy) |
Interpolates 8 elements of 2x2 blocks for 4 channel 32 bit frames. More... | |
static __m128i | interpolation4Channel32Bit2x4Elements (const __m128i &values0, const __m128i &values1, const __m128i &fx_fy_, const __m128i &fxfy_, const __m128i &fx_fy, const __m128i &fxfy) |
Interpolates 2x4 elements (two seperated blocks of 4 elements) of 2x2 blocks for 4 channel 32 bit frames. More... | |
static unsigned int | ssd2Channel16Bit1x1 (const uint8_t *const pixel0, const uint8_t *const pixel1, const unsigned int size0, const unsigned int size1, const unsigned int f1x_y_, const unsigned int f1xy_, const unsigned int f1x_y, const unsigned int f1xy) |
Returns the interpolated sum of square difference for one 2 channel 16 bit pixel. More... | |
static unsigned int | ssd2Channel16Bit1x1 (const uint8_t *const pixel0, const uint8_t *const pixel1, const unsigned int size0, const unsigned int size1, const unsigned int f0x_y_, const unsigned int f0xy_, const unsigned int f0x_y, const unsigned int f0xy, const unsigned int f1x_y_, const unsigned int f1xy_, const unsigned int f1x_y, const unsigned int f1xy) |
Returns the interpolated sum of square difference for one 2 channel 16 bit pixel. More... | |
static __m128i | sumAbsoluteDifferences8Bit16Elements (const uint8_t *const image0, const uint8_t *const image1) |
Sum absolute differences determination for 16 elements of an 16 elements buffer with 8 bit precision. More... | |
static OCEAN_FORCE_INLINE void | deInterleave3Channel8Bit15Elements (const __m128i &interleaved, __m128i &channel01, __m128i &channel2) |
Deinterleaves 15 elements of e.g., and image with 3 channels and 8 bit per element. More... | |
static OCEAN_FORCE_INLINE void | deInterleave3Channel8Bit24Elements (const __m128i &interleavedA, const __m128i &interleavedB, __m128i &channel01, __m128i &channel2) |
Deinterleaves 24 elements of e.g., and image with 3 channels and 8 bit per element. More... | |
static OCEAN_FORCE_INLINE void | deInterleave3Channel8Bit48Elements (const __m128i &interleavedA, const __m128i &interleavedB, const __m128i &interleavedC, __m128i &channel0, __m128i &channel1, __m128i &channel2) |
Deinterleaves 48 elements of e.g., and image with 3 channels and 8 bit per element. More... | |
static void | deInterleave3Channel8Bit48Elements (const uint8_t *interleaved, __m128i &channel0, __m128i &channel1, __m128i &channel2) |
Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element. More... | |
static void | deInterleave3Channel8Bit48Elements (const uint8_t *interleaved, uint8_t *channel0, uint8_t *channel1, uint8_t *channel2) |
Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element. More... | |
static void | deInterleave3Channel8Bit45Elements (const uint8_t *interleaved, __m128i &channel0, __m128i &channel1, __m128i &channel2) |
Deinterleaves 45 elements of e.g., an image with 3 channels and 8 bit per element. More... | |
static OCEAN_FORCE_INLINE void | interleave3Channel8Bit48Elements (const __m128i &channel0, const __m128i &channel1, const __m128i &channel2, __m128i &interleavedA, __m128i &interleavedB, __m128i &interleavedC) |
Interleaves 48 elements of e.g., an image with 3 channels and 8 bit per element. More... | |
static OCEAN_FORCE_INLINE void | interleave3Channel8Bit48Elements (const uint8_t *const channel0, const uint8_t *const channel1, const uint8_t *const channel2, uint8_t *const interleaved) |
Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element. More... | |
static OCEAN_FORCE_INLINE void | reverseChannelOrder2Channel8Bit32Elements (const uint8_t *interleaved, uint8_t *reversedInterleaved) |
Reverses the order of the channels of 16 pixels (32 elements) of an image with 2 interleaved channels and 8 bit per element (e.g., YA16 to AY16). More... | |
static OCEAN_FORCE_INLINE void | reverseChannelOrder3Channel8Bit48Elements (const __m128i &interleaved0, const __m128i &interleaved1, const __m128i &interleaved2, __m128i &reversedInterleaved0, __m128i &reversedInterleaved1, __m128i &reversedInterleaved2) |
Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channels and 8 bit per element. More... | |
static OCEAN_FORCE_INLINE void | reverseChannelOrder3Channel8Bit48Elements (const uint8_t *interleaved, uint8_t *reversedInterleaved) |
Reverses the order of the first and last channel of 48 elements (16 pixels) of an image with 3 interleaved channels and 8 bit per element (e.g., RGB24 to BGR24). More... | |
static OCEAN_FORCE_INLINE void | reverseChannelOrder4Channel8Bit64Elements (const uint8_t *interleaved, uint8_t *reversedInterleaved) |
Reverses the order of the channels of 16 pixels (64 elements) of an image with 4 interleaved channels and 8 bit per element (e.g., RGBA32 to ABGR24). More... | |
static void | reverseChannelOrder3Channel8Bit48Elements (uint8_t *interleaved) |
Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channels and 8 bit per element (in place). More... | |
static void | swapReversedChannelOrder3Channel8Bit48Elements (uint8_t *first, uint8_t *second) |
Reverses the order of the first and last channel of two sets of 48 elements of an image with 3 interleaved channels and 8 bit per element and further swaps both sets. More... | |
static void | reverseElements8Bit48Elements (const __m128i &elements0, const __m128i &elements1, const __m128i &elements2, __m128i &reversedElements0, __m128i &reversedElements1, __m128i &reversedElements2) |
Reverses the order of 48 elements with 8 bit per element. More... | |
static void | reverseElements8Bit48Elements (const uint8_t *elements, uint8_t *reversedElements) |
Reverses the order of 48 elements with 8 bit per element. More... | |
static void | reverseElements8Bit48Elements (uint8_t *elements) |
Reverses the order of 48 elements with 8 bit per element (in place). More... | |
static void | swapReversedElements8Bit48Elements (uint8_t *first, uint8_t *second) |
Reverses the order of two sets of 48 elements with 8 bit per element and further swaps both sets. More... | |
static void | shiftChannelToFront4Channel32Bit (const uint8_t *elements, uint8_t *shiftedElements) |
Shifts the channels of a 4 channel 32 bit pixels to the front and moves the front channel to the back channel. More... | |
static void | shiftAndMirrorChannelToFront4Channel32Bit (const uint8_t *elements, uint8_t *shiftedElements) |
Shifts the channels of a 4 channel 32 bit pixels to the front and moves the front channel to the back channel and mirrors the four individual pixels. More... | |
static void | shiftChannelToBack4Channel32Bit (const uint8_t *elements, uint8_t *shiftedElements) |
Shifts the channels of a 4 channel 32 bit pixels to the back and moves the back channel to the front channel. More... | |
static void | shiftAndMirrorChannelToBack4Channel32Bit (const uint8_t *elements, uint8_t *shiftedElements) |
Shifts the channels of a 4 channel 32 bit pixels to the back and moves the back channel to the front channel and mirrors the four individual pixels. More... | |
static __m128i | sum1Channel8Bit16Elements (const __m128i &elements) |
Sums 16 elements with 8 bit per element. More... | |
static __m128i | sum1Channel8Bit16Elements (const uint8_t *elements) |
Sums 16 elements with 8 bit per element. More... | |
template<bool tBufferHas16Bytes> | |
static __m128i | sum1Channel8BitFront15Elements (const uint8_t *elements) |
Sums the first 15 elements of a buffer with 8 bit per element. More... | |
static __m128i | sum1Channel8BitBack15Elements (const uint8_t *elements) |
Sums the last 15 elements of a 16 elements buffer with 8 bit per element, the beginning 1 element is interpreted as zero. More... | |
static __m128i | sumInterleave3Channel8Bit48Elements (const __m128i &interleaved0, const __m128i &interleaved1, const __m128i &interleaved2) |
Sums 16 elements individually for an interleaved pixel format with 3 channels and 8 bit per channel and element. More... | |
static __m128i | sumInterleave3Channel8Bit48Elements (const uint8_t *interleaved) |
Sums 16 elements individually for an interleaved pixel format with 3 channels and 8 bit per channel and element. More... | |
static __m128i | sumInterleave3Channel8Bit45Elements (const uint8_t *interleaved) |
Sums 15 elements individually for an interleaved pixel format with 3 channels and 8 bit per channel and element. More... | |
static __m128i | load128iLower64 (const void *const buffer) |
Loads the lower 64 bit of a 128i value from the memory. More... | |
static __m128i | load128i (const void *const buffer) |
Loads a 128i value from the memory. More... | |
template<bool tBufferHas16Bytes> | |
static __m128i | load_u8_10_upper_zero (const uint8_t *const buffer) |
Loads 10 bytes from memory, which holds either at least 16 bytes or exactly 10 bytes, to a 128i value and sets the remaining bytes of the resulting 128i value to zero. More... | |
template<bool tBufferHas16Bytes> | |
static __m128i | load_u8_15_upper_zero (const uint8_t *const buffer) |
Loads 15 bytes from memory, which holds either at least 16 bytes or exactly 15 bytes, to a 128i value and sets the remaining byte of the resulting 128i value to zero. More... | |
template<bool tBufferHas16Bytes> | |
static __m128i | load_u8_13_lower_random (const uint8_t *const buffer) |
Loads 13 bytes from memory, which holds either at least 16 bytes or exactly 13 bytes, to a 128i value while the remaining byte of the resulting 128i value will be random. More... | |
template<bool tBufferHas16Bytes> | |
static __m128i | load_u8_15_lower_zero (const uint8_t *const buffer) |
Loads 15 bytes from memory, which holds either at least 16 bytes or exactly 15 bytes, to a 128i value and sets the remaining byte of the resulting 128i value to zero. More... | |
template<bool tBufferHas16Bytes> | |
static __m128i | load_u8_15_lower_random (const uint8_t *const buffer) |
Loads 15 bytes from memory, which holds either at least 16 bytes or exactly 15 bytes, to a 128i value while the remaining byte of the resulting 128i value will be random. More... | |
template<unsigned int tShiftBytes> | |
static __m128i | load_u8_16_and_shift_right (const uint8_t *const buffer) |
Loads 16 bytes from memory which is at least 16 bytes large and shifts the 128i value by a specified number of bytes to the right (by inserting zeros). More... | |
static void | store128i (const __m128i &value, uint8_t *const buffer) |
Stores a 128i value to the memory. More... | |
static __m128i | set128i (const unsigned long long high64, const unsigned long long low64) |
Sets a 128i value by two 64 bit values. More... | |
static __m128i | removeHighBits32_16 (const __m128i &value) |
Removes the higher 16 bits of four 32 bit elements. More... | |
static __m128i | removeLowBits32_16 (const __m128i &value) |
Removes the lower 16 bits of four 32 bit elements. More... | |
static __m128i | removeHighBits16_8 (const __m128i &value) |
Removes the higher 8 bits of eight 16 bit elements. More... | |
static __m128i | removeHighBits16_8_7_lower (const __m128i &value) |
Removes the higher 8 bits of eight 16 bit elements and sets the upper two bytes to zero. More... | |
static __m128i | removeHighBits16_8_7_upper (const __m128i &value) |
Removes the higher 8 bits of eight 16 bit elements and sets the lower two bytes to zero. More... | |
static __m128i | moveLowBits16_8ToLow64 (const __m128i &value) |
Moves the lower 8 bits of eight 16 bit elements to the lower 64 bits and fills the high 64 bits with 0. More... | |
static __m128i | moveLowBits32_8ToLow32 (const __m128i &value) |
Moves the lower 8 bits of four 32 bit elements to the lower 32 bits and fills the high 96 bits with 0. More... | |
static __m128i | moveLowBits32_16ToLow64 (const __m128i &value) |
Moves the lower 16 bits of four 32 bit elements to the lower 64 bits and fills the high 64 bits with 0. More... | |
static __m128i | moveLowBits16_8ToHigh64 (const __m128i &value) |
Moves the lower 8 bits of eight 16 bit elements to the higher 64 bits and fills the low 64 bits with 0. More... | |
static __m128i | moveHighBits32_16 (const __m128i &value) |
Moves the higher 16 bits of four 32 bit elements to the lower 16 bits and fills the high bits with 0. More... | |
static __m128i | moveHighBits16_8 (const __m128i &value) |
Moves the higher 8 bits of eight 16 bit elements to the lower 8 bits and fills the high bits with 0. More... | |
static __m128i | moveHighBits16_8_5 (const __m128i &value) |
Moves the higher 8 bits of five 16 bit elements to the lower 8 bits and fills the high bits with 0. More... | |
static __m128i | moveHighBits16_8_6 (const __m128i &value) |
Moves the higher 8 bits of six 16 bit elements to the lower 8 bits and fills the high bits with 0. More... | |
static __m128i | moveHighBits16_8_7 (const __m128i &value) |
Moves the higher 8 bits of seven 16 bit elements to the lower 8 bits and fills the high bits with 0. More... | |
static __m128i | shuffleLow32ToLow32_8 (const __m128i &value) |
Shuffles the lower four 8 bits to the low 8 bits of four 32 bit elements. More... | |
static __m128i | shuffleNeighbor4Low64BitsToLow16_8 (const __m128i &value) |
Shuffles pairs of four neighbors of the low 64 bits to the low 8 bits of eight 16 bit elements. More... | |
static __m128i | shuffleNeighbor4High64BitsToLow16_8 (const __m128i &value) |
Shuffles pairs of four neighbors of the high 64 bits to the low 8 bits of eight 16 bit elements. More... | |
static __m128i | shuffleNeighbor2Low64BitsToLow16_8 (const __m128i &value) |
Shuffles pairs of two neighbors of the low 64 bits to the low 8 bits of eight 16 bit elements. More... | |
static __m128i | shuffleNeighbor2High64BitsToLow16_8 (const __m128i &value) |
Shuffles pairs of two neighbors of the high 64 bits to the low 8 bits of eight 16 bit elements. More... | |
static __m128i | bitMaskRemoveHigh16_8 () |
Returns the following 128 bit mask: 0x00FF00FF-00FF00FF-00FF00FF-00FF00FF. More... | |
static __m128i | bitMaskRemoveHigh32_16 () |
Returns the following 128 bit mask: 0x0000FFFF-0000FFFF-0000FFFF-0000FFFF. More... | |
static OCEAN_FORCE_INLINE void | multiplyInt8x16ToInt32x8 (const __m128i &values0, const __m128i &values1, __m128i &products0, __m128i &products1) |
Multiplies 8 int16_t values with 8 int16_t values and returns the products as 8 int32_t results. More... | |
static OCEAN_FORCE_INLINE void | multiplyInt8x16ToInt32x8AndAccumulate (const __m128i &values0, const __m128i &values1, __m128i &results0, __m128i &results1) |
Multiplies 8 int16_t values with 8 int16_t values and adds the products to 8 int32_t values. More... | |
template<> | |
__m128i | load_u8_10_upper_zero (const uint8_t *const buffer) |
template<> | |
__m128i | load_u8_15_upper_zero (const uint8_t *const buffer) |
template<> | |
__m128i | load_u8_13_lower_random (const uint8_t *const buffer) |
template<> | |
__m128i | load_u8_15_lower_zero (const uint8_t *const buffer) |
template<> | |
__m128i | load_u8_15_lower_random (const uint8_t *const buffer) |
Static Private Member Functions | |
static unsigned int | interpolation2Channel16Bit1x1 (const uint8_t *const pixel, const unsigned int size, const unsigned int fx_y_, const unsigned int fxy_, const unsigned int fx_y, const unsigned int fxy) |
Returns the interpolated pixel values for one 2 channel 16 bit pixel. More... | |
This class implements computer vision functions using SSE extensions.
|
inlinestatic |
Adds 1 to each signed 16 bit value which is both, negative and odd, so that each value can be right shifted by one bit to allow a correct division by two.
This function must be invoked before the right shift is applied.
value | The eight signed 16 bit values to be handled |
SSE does not have an intrinsic for integer division, so right bit shift is used instead. Unfortunately, for negative odd integer values v: (v / 2) != (v >> 1) because a right shift rounds towards negative infinity, e.g. -5 / 2 = -2 and -5 >> 1 = -3. As a work-around, an offset of 1 is added to all values that are both, negative and odd.
|
inlinestatic |
Adds 1 to each signed 32 bit value which is both, negative and odd, so that each value can be right shifted by one bit to allow a correct division by two.
This function must be invoked before the right shift is applied.
value | The eight signed 32 bit values to be handled |
SSE does not have an intrinsic for integer division, so right bit shift is used instead. Unfortunately, for negative odd integer values v: (v / 2) != (v >> 1) because a right shift rounds towards negative infinity, e.g. -5 / 2 = -2 and -5 >> 1 = -3. As a work-around, an offset of 1 is added to all values that are both, negative and odd.
|
inlinestatic |
Adds 2^shifts - 1 to each negative signed 16 bit value, so they each value can be right shifted to allow a correct division by 2^shifts.
This function must be invoked before the right shift is applied.
value | The eight signed 16 bit values to be handled |
rightShifts | The number of right shifts which needs to be applied, with range [0, 15] |
|
inlinestatic |
Adds 2^shifts - 1 to each negative signed 32 bit value, so they each value can be right shifted to allow a correct division by 2^shifts.
This function must be invoked before the right shift is applied.
value | The eight signed 32 bit values to be handled |
rightShifts | The number of right shifts which needs to be applied, with range [0, 31] |
|
inlinestatic |
Averages 16 elements of 2x2 blocks for 1 channel 8 bit frames.
The function takes two rows of 16 elements and returns 8 average elements (8 averaged pixels).
image0 | First row of 16 elements, must be valid |
image1 | Second row of 16 elements, must be valid |
result | Resulting 8 average elements, must be valid |
|
inlinestatic |
Averages 16 elements of 2x2 blocks for 2 channel 16 bit frames.
The function takes two rows of 32 elements and returns 8 average elements (4 averaged pixels, each with 2 channels).
image0 | First row of 16 elements |
image1 | Second row of 16 elements |
result | Resulting 8 average elements |
|
inlinestatic |
Averages 16 elements of 2x2 blocks for 4 channel 32 bit frames.
The function takes two rows of 16 elements and returns 8 average elements (2 averaged pixels, each with 4 channels).
image0 | First row of 16 elements |
image1 | Second row of 16 elements |
result | Resulting 8 average elements |
|
inlinestatic |
Averages 16 elements of 2x2 blocks for 1 binary (x00 or 0xFF) frames.
The function takes two rows of 16 elements and returns 8 average elements (8 averaged pixels).
image0 | First row of 16 elements, must be valid |
image1 | Second row of 16 elements, must be valid |
result | Resulting 8 average elements, must be valid |
threshold | The minimal sum value of four pixels to result in a mask with value 255, with range [1, 255 * 4] |
|
inlinestatic |
Averages 24 elements of 2x2 blocks for 3 channel 24 bit frames.
The function takes two rows of 24 elements and returns 12 average elements (4 averaged pixels, each with 3 channels).
image0 | First row of 24 elements |
image1 | Second row of 24 elements |
result | Resulting 12 average elements |
|
inlinestatic |
Averages 30 elements of 3x3 blocks for 1 channel 8 bit frames.
The function takes two rows of 30 elements and returns 10 average elements (10 averaged pixels).
image0 | First row of 30 elements |
image1 | Second row of 30 elements |
image2 | Third row of 30 elements |
result | Resulting 10 average elements |
| 1 2 1 |
1/16 | 2 4 2 | | 1 2 1 |
|
inlinestatic |
Averages 32 elements of 2x2 blocks for 1 channel 8 bit frames.
The function takes two rows of 32 elements and returns 16 average elements (16 averaged pixels).
image0 | First row of 32 elements |
image1 | Second row of 32 elements |
result | Resulting 16 average elements |
|
inlinestatic |
Averages 32 elements of 2x2 blocks for 2 channel 16 bit frames.
The function takes two rows of 32 elements and returns 16 average elements (8 averaged pixels, each with 2 channels).
image0 | First row of 32 elements |
image1 | Second row of 32 elements |
result | Resulting 16 average elements |
|
inlinestatic |
Averages 32 elements of 2x2 blocks for 4 channel 32 bit frames.
The function takes two rows of 32 elements and returns 16 average elements (4 averaged pixels, each with 4 channels).
image0 | First row of 32 elements |
image1 | Second row of 32 elements |
result | Resulting 16 average elements |
|
inlinestatic |
Averages 32 elements of 2x2 blocks for 1 binary (0x00 or 0xFF) frames.
The function takes two rows of 32 elements and returns 16 average elements (16 averaged pixels).
image0 | First row of 32 elements, must be valid |
image1 | Second row of 32 elements, must be valid |
result | Resulting 16 average elements, must be valid |
threshold | The minimal sum value of four pixels to result in a mask with value 255, with range [1, 255 * 4] |
|
inlinestatic |
Averages 6 elements of 2x2 blocks for 3 channel 96 bit frames.
The function takes two rows of 6 elements and returns 3 average elements (1 averaged pixels, each with 3 channels).
image0 | First row of 6 elements |
image1 | Second row of 6 elements |
result | Resulting 3 average elements |
|
inlinestatic |
Averages 8 elements of 2x2 blocks for 1 channel 32 bit frames.
The function takes two rows of 8 elements and returns 4 average elements (4 averaged pixels).
image0 | First row of 8 elements |
image1 | Second row of 8 elements |
result | Resulting 4 average elements |
|
inlinestatic |
Averages 8 elements of 2x2 blocks for 1 channel 8 bit frames.
The function takes two rows of 8 elements and returns 4 average elements (4 averaged pixels).
image0 | First row of 8 elements |
image1 | Second row of 8 elements |
result | Resulting 4 average elements |
|
inlinestatic |
Averages 8 elements of 2x2 blocks for 2 channel 16 bit frames.
The function takes two rows of 8 elements and returns 4 average elements (2 averaged pixels, each with 2 channels).
image0 | First row of 8 elements |
image1 | Second row of 8 elements |
result | Resulting 4 average elements |
|
inlinestatic |
Averages 8 elements of 2x2 blocks for 2 channel 64 bit frames.
The function takes two rows of 8 elements and returns 4 average elements (2 averaged pixels).
image0 | First row of 8 elements |
image1 | Second row of 8 elements |
result | Resulting 4 average elements |
|
inlinestatic |
Averages 8 elements of 2x2 blocks for 4 channel 128 bit frames.
The function takes two rows of 8 elements and returns 4 average elements (1 averaged pixel).
image0 | First row of 8 elements |
image1 | Second row of 8 elements |
result | Resulting 4 average elements |
|
inlinestatic |
Averages 8 elements of 2x2 blocks for 1 binary (x00 or 0xFF) frames.
The function takes two rows of 8 elements and returns 4 average elements (4 averaged pixels).
image0 | First row of 8 elements, must be valid |
image1 | Second row of 8 elements, must be valid |
result | Resulting 4 average elementss, must be valid |
threshold | The minimal sum value of four pixels to result in a mask with value 255, with range [1, 255 * 4] |
|
inlinestatic |
Returns the following 128 bit mask: 0x00FF00FF-00FF00FF-00FF00FF-00FF00FF.
|
inlinestatic |
Returns the following 128 bit mask: 0x0000FFFF-0000FFFF-0000FFFF-0000FFFF.
|
static |
Deinterleaves 15 elements of e.g., and image with 3 channels and 8 bit per element.
This functions converts X CBA CBA CBA CBA CBA to 00000000000CCCCC 000BBBBB000AAAAA.
interleaved | The 15 elements holding the interleaved image data |
channel01 | Resulting first and second channel elements, first 8 elements of the first channel, followed by 8 elements of the second channel |
channel2 | Resulting third channel elements, first 8 elements of the third channel, followed by zeros |
|
static |
Deinterleaves 24 elements of e.g., and image with 3 channels and 8 bit per element.
This functions converts XX XXX XXX CBA CBA CB A CBA CBA CBA CBA CBA to 00000000CCCCCCCC BBBBBBBBAAAAAAAA.
interleavedA | First 16 elements holding the interleaved image data |
interleavedB | Second 16 elements holding the interleaved image data, the first 8 elements will be used only |
channel01 | Resulting first and second channel elements, first 8 elements of the first channel, followed by 8 elements of the second channel |
channel2 | Resulting third channel elements, first 8 elements of the third channel, followed by zeros |
|
inlinestatic |
Deinterleaves 45 elements of e.g., an image with 3 channels and 8 bit per element.
interleaved | 45 elements of an image with 3 channels and 8 bit per element (45 bytes), must be valid |
channel0 | Resulting first channel holding all elements corresponding to the first channel consecutively |
channel1 | Resulting second channel holding all elements corresponding to the second channel consecutively |
channel2 | Resulting third channel holding all elements corresponding to the third channel consecutively |
|
static |
Deinterleaves 48 elements of e.g., and image with 3 channels and 8 bit per element.
This functions converts CBA CBA CBA CBA CBA C BA CBA CBA CBA CBA CB A CBA CBA CBA CBA CBA to CCCCCCCCCCCCCCCC BBBBBBBBBBBBBBBB AAAAAAAAAAAAAAAA.
interleavedA | First 16 elements holding the interleaved image data |
interleavedB | Second 16 elements holding the interleaved image data |
interleavedC | Third 16 elements holding the interleaved image data |
channel0 | Resulting first channel holding all elements corresponding to the first channel consecutively |
channel1 | Resulting second channel holding all elements corresponding to the second channel consecutively |
channel2 | Resulting third channel holding all elements corresponding to the third channel consecutively |
|
inlinestatic |
Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
interleaved | 48 elements of an image with 3 channels and 8 bit per element (48 bytes) |
channel0 | Resulting first channel holding all elements corresponding to the first channel consecutively |
channel1 | Resulting second channel holding all elements corresponding to the second channel consecutively |
channel2 | Resulting third channel holding all elements corresponding to the third channel consecutively |
|
inlinestatic |
Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
interleaved | 48 elements of an image with 3 channels and 8 bit per element (48 bytes), must be valid |
channel0 | Resulting first channel holding all elements corresponding to the first channel consecutively, must be valid |
channel1 | Resulting second channel holding all elements corresponding to the second channel consecutively, must be valid |
channel2 | Resulting third channel holding all elements corresponding to the third channel consecutively, must be valid |
|
inlinestatic |
Divides eight signed 16 bit values by applying a right shift.
This is able to determine the correct division result for positive and negative 16 bit values.
value | The eight signed 16 bit values to be handled |
rightShifts | The number of right shifts which needs to be applied, with range [0, 15] |
|
inlinestatic |
Divides eight signed 32 bit values by applying a right shift.
This is able to determine the correct division result for positive and negative 32 bit values.
value | The eight signed 32 bit values to be handled |
rightShifts | The number of right shifts which needs to be applied, with range [0, 32] |
|
inlinestatic |
Determines the horizontal and the vertical gradients for 16 following pixels for a given 1 channel 8 bit frame.
The resulting gradients are interleaved and each response is inside the range [-127, 127] as the standard response is divided by two.
source | The source position of the first pixel to determine the gradient for, this pixel must not be a border pixel in the original frame |
response | Resulting gradient responses, first the horizontal response then the vertical response (zipped) for 8 pixels |
width | The width of the original frame in pixel, with range [10, infinity) |
|
inlinestatic |
Determines the squared horizontal and vertical gradients and the product of both gradients for 16 following pixels for a given 1 channel 8 bit frame.
The resulting gradients are interleaved and each response is inside the range [-(127 * 127), 127 * 127] as the standard response is divided by two.
source | The source position of the first pixel to determine the gradient for, this pixel must not be a border pixel in the original frame |
response | Resulting gradient responses, first the horizontal response then the vertical response and afterwards the product of horizontal and vertical response (zipped) for 8 pixels |
width | The width of the original frame in pixel, with range [10, infinity) |
|
static |
Interleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
This functions converts CCCCCCCCCCCCCCCC BBBBBBBBBBBBBBBB AAAAAAAAAAAAAAAA to CBA CBA CBA CBA CBA C BA CBA CBA CBA CBA CB A CBA CBA CBA CBA CBA.
channel0 | The 16 elements of the first channel to be interleaved |
channel1 | The 16 elements of the second channel to be interleaved |
channel2 | The 16 elements of the third channel to be interleaved |
interleavedA | Resulting first 16 of the interleaved data |
interleavedB | Resulting second 16 of the interleaved data |
interleavedC | Resulting third 16 of the interleaved data |
|
static |
Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
channel0 | The 16 elements of the first channel to be interleaved, must be valid |
channel1 | The 16 elements of the second channel to be interleaved, must be valid |
channel2 | The 16 elements of the third channel to be interleaved, must be valid |
interleaved | The resulting 48 interleaved elements, must be valid |
|
inlinestatic |
Interpolates 15 elements of 2x2 blocks for 1 channel 8 bit frames.
The interpolation is specified by tx and ty with range [0, 128u].
values0 | First row of 16 elements to be interpolated |
values1 | Second row of 16 elements to be interpolated |
fx_fy_fxfy_ | In each unsigned 16 bit element: ((128u - tx) * (128u - ty)) | (tx * (128u - ty)) << 16 |
fx_fyfxfy | In each unsigned 16 bit element: (128u - tx) * ty | (tx * ty) << 16 |
|
inlinestatic |
Interpolates 8 elements of 2x2 blocks for 1 channel 8 bit frames.
The first interpolation element results from the first and second element of both rows.
The second interpolation element results from the second and third element of both rows.
...
The eighth interpolation element results from the eighth and ninth.
The interpolation is specified by tx and ty with range [0, 128u].
values0 | First row of 9 elements to be interpolated |
values1 | Second row of 9 elements to be interpolated |
fx_fy_ | In each unsigned 16 bit element: Product of (128u - tx) and (128u - ty) |
fxfy_ | In each unsigned 16 bit element: Product of (tx) and (128u - ty) |
fx_fy | In each unsigned 16 bit element: Product of (128u - tx) and (ty) |
fxfy | In each unsigned 16 bit element: Product of (tx) and (ty) |
|
inlinestaticprivate |
Returns the interpolated pixel values for one 2 channel 16 bit pixel.
pixel | Upper left pixel in the frame |
size | Size of one frame row in bytes |
fx_y_ | Product of the inverse fx and the inverse fy interpolation factor |
fxy_ | Product of the fx and the inverse fy interpolation factor |
fx_y | Product of the inverse fx and the fy interpolation factor |
fxy | Product of the fx and the fy interpolation factor |
|
inlinestatic |
Interpolates 8 elements of 2x2 blocks for 2 channel 16 bit frames.
The first interpolation element results from the first and second element of both rows.
The second interpolation element results from the second and third element of both rows.
...
The eighth interpolation element results from the eighth and ninth.
The interpolation is specified by tx and ty with range [0, 128u].
values0 | First row of 10 elements to be interpolated |
values1 | Second row of 10 elements to be interpolated |
fx_fy_ | In each unsigned 16 bit element: Product of (128u - tx) and (128u - ty) |
fxfy_ | In each unsigned 16 bit element: Product of (tx) and (128u - ty) |
fx_fy | In each unsigned 16 bit element: Product of (128u - tx) and (ty) |
fxfy | In each unsigned 16 bit element: Product of (tx) and (ty) |
|
inlinestatic |
Interpolates 12 elements of 2x2 blocks for 3 channel 24 bit frames.
The interpolation is specified by tx and ty with range [0, 128u].
values0 | First row of 15 elements to be interpolated |
values1 | Second row of 15 elements to be interpolated |
fx_fy_fxfy_ | In each unsigned 16 bit element: ((128u - tx) * (128u - ty)) | (tx * (128u - ty)) << 16 |
fx_fyfxfy | In each unsigned 16 bit element: (128u - tx) * ty | (tx * ty) << 16 |
|
inlinestatic |
Interpolates 8 elements of 2x2 blocks for 3 channel 24 bit frames.
The first interpolation element results from the first and second element of both rows.
The second interpolation element results from the second and third element of both rows.
...
The eighth interpolation element results from the eighth and ninth.
The interpolation is specified by tx and ty with range [0, 128u].
values0 | First row of 11 elements to be interpolated |
values1 | Second row of 11 elements to be interpolated |
fx_fy_ | In each unsigned 16 bit element: Product of (128u - tx) and (128u - ty) |
fxfy_ | In each unsigned 16 bit element: Product of (tx) and (128u - ty) |
fx_fy | In each unsigned 16 bit element: Product of (128u - tx) and (ty) |
fxfy | In each unsigned 16 bit element: Product of (tx) and (ty) |
|
inlinestatic |
Interpolates 2x4 elements (two seperated blocks of 4 elements) of 2x2 blocks for 4 channel 32 bit frames.
The first interpolation element results from the first and second element of both rows.
The second interpolation element results from the second and third element of both rows.
...
The eighth interpolation element results from the eighth and ninth.
The interpolation is specified by tx and ty with range [0, 128u].
values0 | First row of 16 elements to be interpolated |
values1 | Second row of 16 elements to be interpolated |
fx_fy_ | In each unsigned 16 bit element: Product of (128u - tx) and (128u - ty) |
fxfy_ | In each unsigned 16 bit element: Product of (tx) and (128u - ty) |
fx_fy | In each unsigned 16 bit element: Product of (128u - tx) and (ty) |
fxfy | In each unsigned 16 bit element: Product of (tx) and (ty) |
|
inlinestatic |
Interpolates 8 elements of 2x2 blocks for 4 channel 32 bit frames.
The first interpolation element results from the first and second element of both rows.
The second interpolation element results from the second and third element of both rows.
...
The eighth interpolation element results from the eighth and ninth.
The interpolation is specified by tx and ty with range [0, 128u].
values0 | First row of 12 elements to be interpolated |
values1 | Second row of 12 elements to be interpolated |
fx_fy_ | In each unsigned 16 bit element: Product of (128u - tx) and (128u - ty) |
fxfy_ | In each unsigned 16 bit element: Product of (tx) and (128u - ty) |
fx_fy | In each unsigned 16 bit element: Product of (128u - tx) and (ty) |
fxfy | In each unsigned 16 bit element: Product of (tx) and (ty) |
|
inlinestatic |
Loads a 128i value from the memory.
buffer | Buffer to be loaded (does not need to be aligned on any particular boundary), ensure that the buffer has a size of at least 16 bytes |
|
inlinestatic |
Loads the lower 64 bit of a 128i value from the memory.
The upper 64 bit are zeroed.
buffer | Buffer to be loaded (does not need to be aligned on any particular boundary), ensure that the buffer has a size of at least 8 bytes |
|
inlinestatic |
Loads 10 bytes from memory, which holds either at least 16 bytes or exactly 10 bytes, to a 128i value and sets the remaining bytes of the resulting 128i value to zero.
The loaded memory will be stored in the upper 10 bytes of the 128i value while the lowest remaining 6 bytes will be set to zero. Thus, the resulting 128 bit value has the following byte pattern (high bits left, low bits right): [09 08 07 06 05 04 03 02 01 00 ZZ ZZ ZZ ZZ ZZ ZZ], with ZZ meaning zero.
buffer | Buffer to be loaded (does not need to be aligned on any particular boundary) |
tBufferHas16Bytes | True, if the buffer holds at least 16 bytes; False, if the buffer holds only 10 bytes |
|
inlinestatic |
|
inlinestatic |
Loads 13 bytes from memory, which holds either at least 16 bytes or exactly 13 bytes, to a 128i value while the remaining byte of the resulting 128i value will be random.
The loaded memory will be stored in the lower 13 bytes of the 128i value while the highest remaining 3 byte will be random.
Thus, the resulting 128 bit value has the following byte pattern (high bits left, low bits right): [?? ?? ?? 12 11 10 09 08 07 06 05 04 03 02 01 00], with ?? meaning a random value.
buffer | Buffer to be loaded (does not need to be aligned on any particular boundary) |
tBufferHas16Bytes | True, if the buffer holds at least 16 bytes; False, if the buffer holds only 13 bytes |
|
inlinestatic |
|
inlinestatic |
Loads 15 bytes from memory, which holds either at least 16 bytes or exactly 15 bytes, to a 128i value while the remaining byte of the resulting 128i value will be random.
The loaded memory will be stored in the lower 15 bytes of the 128i value while the highest remaining 1 byte will be random.
Thus, the resulting 128 bit value has the following byte pattern (high bits left, low bits right): [?? 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00], with ?? meaning a random value.
buffer | Buffer to be loaded (does not need to be aligned on any particular boundary) |
tBufferHas16Bytes | True, if the buffer holds at least 16 bytes; False, if the buffer holds only 15 bytes |
|
inlinestatic |
|
inlinestatic |
Loads 15 bytes from memory, which holds either at least 16 bytes or exactly 15 bytes, to a 128i value and sets the remaining byte of the resulting 128i value to zero.
The loaded memory will be stored in the lower 15 bytes of the 128i value while the highest remaining 1 byte will be set to zero.
Thus, the resulting 128 bit value has the following byte pattern (high bits left, low bits right): [– 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00], with ZZ meaning zero.
buffer | Buffer to be loaded (does not need to be aligned on any particular boundary) |
tBufferHas16Bytes | True, if the buffer holds at least 16 bytes; False, if the buffer holds only 15 bytes |
|
inlinestatic |
|
inlinestatic |
Loads 15 bytes from memory, which holds either at least 16 bytes or exactly 15 bytes, to a 128i value and sets the remaining byte of the resulting 128i value to zero.
The loaded memory will be stored in the upper 15 bytes of the 128i value while the lowest remaining 1 byte will be set to zero. Thus, the resulting 128 bit value has the following byte pattern (high bits left, low bits right): [14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 ZZ], with ZZ meaning zero.
buffer | Buffer to be loaded (does not need to be aligned on any particular boundary) |
tBufferHas16Bytes | True, if the buffer holds at least 16 bytes; False, if the buffer holds only 15 bytes |
|
inlinestatic |
|
inlinestatic |
Loads 16 bytes from memory which is at least 16 bytes large and shifts the 128i value by a specified number of bytes to the right (by inserting zeros).
This function can be used if the remaining buffer is smaller than 16 bytes while the buffer exceeds/continues in the lower address space (from the original point of interest).
Thus, this function an handle a buffer with the following pattern (with lower address left and high address right):
| ?? ?? ?? ?? ?? ?? ?? ?? ?? V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 |, where ?? represent random values in our buffer (in the lower address space), and VX represent the values of interest and V0 the location to which 'buffer' is pointing to.
by load_u8_16_and_shift_right<6>(buffer - 6);
The resulting 128i register will then be composed of (high bits left, low bits right): [00 00 00 00 00 00 V9 V8 V7 V6 V5 V4 V3 V2 V1 V0].
buffer | The actual address from which the 16 bytes will be loaded, must be valid and must be at least 16 bytes large |
tShiftBytes | The number of bytes which will be shifted (to the right) after the memory has loaded, with range [0, 16] |
|
inlinestatic |
Moves the higher 8 bits of eight 16 bit elements to the lower 8 bits and fills the high bits with 0.
Given: PONM-LKJI-HGFE-DCBA
Result: 0P0N-0L0J-0H0F-0D0B
value | Value to remove the high bits for |
|
inlinestatic |
Moves the higher 8 bits of five 16 bit elements to the lower 8 bits and fills the high bits with 0.
Given: PONM-LKJI-HGFE-DCBA
Result: 0000-000J-0H0F-0D0B
value | Value to remove the high bits for |
|
inlinestatic |
Moves the higher 8 bits of six 16 bit elements to the lower 8 bits and fills the high bits with 0.
Given: PONM-LKJI-HGFE-DCBA
Result: 0000-0L0J-0H0F-0D0B
value | Value to remove the high bits for |
|
inlinestatic |
Moves the higher 8 bits of seven 16 bit elements to the lower 8 bits and fills the high bits with 0.
Given: PONM-LKJI-HGFE-DCBA
Result: 000N-0L0J-0H0F-0D0B
value | Value to remove the high bits for |
|
inlinestatic |
Moves the higher 16 bits of four 32 bit elements to the lower 16 bits and fills the high bits with 0.
Given: PONM-LKJI-HGFE-DCBA
Result: 00PO-00LK-00HG-00DC
value | Value to remove the high bits for |
|
inlinestatic |
Moves the lower 8 bits of eight 16 bit elements to the higher 64 bits and fills the low 64 bits with 0.
Given: PONM-LKJI-HGFE-DCBA
Result: OMKI-GECA-0000-0000
value | Value to remove the high bits for |
|
inlinestatic |
Moves the lower 8 bits of eight 16 bit elements to the lower 64 bits and fills the high 64 bits with 0.
Given: PONM-LKJI-HGFE-DCBA
Result: 0000-0000-OMKI-GECA
value | Value to remove the high bits for |
|
inlinestatic |
Moves the lower 16 bits of four 32 bit elements to the lower 64 bits and fills the high 64 bits with 0.
Given: PONM-LKJI-HGFE-DCBA
Result: 0000-0000-NMJI-FEBA
value | Value to remove the high bits for |
|
inlinestatic |
Moves the lower 8 bits of four 32 bit elements to the lower 32 bits and fills the high 96 bits with 0.
Given: PONM-LKJI-HGFE-DCBA
Result: 0000-0000-0000-MIEA
value | Value to remove the high bits for |
|
static |
Multiplies 8 int16_t values with 8 int16_t values and returns the products as 8 int32_t results.
The pseudo code of the function is as follows:
products0[0] = values0[0] * values1[0] ... products0[3] = values0[3] * values1[3] products1[0] = values0[4] * values1[4] ... products1[3] = values0[7] * values1[7]
values0 | The first 8 int16_t values to be multiplied |
values1 | The second 8 int16_t values to be multiplied |
products0 | The resulting first 4 int32_t products |
products1 | The resulting second 4 int32_t products |
|
static |
Multiplies 8 int16_t values with 8 int16_t values and adds the products to 8 int32_t values.
The pseudo code of the function is as follows:
results0[0] += values0[0] * values1[0] ... results0[3] += values0[3] * values1[3] results1[0] += values0[4] * values1[4] ... results1[3] += values0[7] * values1[7]
values0 | The first 8 int16_t values to be multiplied |
values1 | The second 8 int16_t values to be multiplied |
results0 | The results to which the first 4 int32_t products will be added |
results1 | The results to which the second 4 int32_t products will be added |
|
inlinestatic |
Prefetches a block of non-temporal memory into non-temporal cache structure.
data | Data to be prefetched |
|
inlinestatic |
Prefetches a block of temporal memory into all cache levels.
data | Data to be prefetched |
|
inlinestatic |
Prefetches a block of temporal memory in all cache levels except 0th cache level.
data | Data to be prefetched |
|
inlinestatic |
Prefetches a block of temporal memory in all cache levels, except 0th and 1st cache levels.
data | Data to be prefetched |
|
inlinestatic |
Removes the higher 8 bits of eight 16 bit elements.
Given: PONM-LKJI-HGFE-DCBA
Result: 0O0M-0K0I-0G0E-0C0A
value | Value to remove the high bits for |
|
inlinestatic |
Removes the higher 8 bits of eight 16 bit elements and sets the upper two bytes to zero.
Given: PONM-LKJI-HGFE-DCBA
Result: 000M-0K0I-0G0E-0C0A
value | Value to remove the high bits for |
|
inlinestatic |
Removes the higher 8 bits of eight 16 bit elements and sets the lower two bytes to zero.
Given: PONM-LKJI-HGFE-DCBA
Result: 0O0M-0K0I-0G0E-0C00
value | Value to remove the high bits for |
|
inlinestatic |
Removes the higher 16 bits of four 32 bit elements.
Given: PONM-LKJI-HGFE-DCBA
Result: 00NM-00JI-00FE-00BA
value | Value to remove the high bits for |
|
inlinestatic |
Removes the lower 16 bits of four 32 bit elements.
Given: PONM-LKJI-HGFE-DCBA
Result: PO00-LK00-HG00-DC00
value | Value to remove the lower bits for |
|
static |
Reverses the order of the channels of 16 pixels (32 elements) of an image with 2 interleaved channels and 8 bit per element (e.g., YA16 to AY16).
interleaved | 16 elements of an image with 2 channels and 8 bit per element (32 bytes) |
reversedInterleaved | Resulting 32 elements with reversed channel order |
|
static |
Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channels and 8 bit per element.
interleaved0 | First 16 elements holding the interleaved image data |
interleaved1 | Second 16 elements holding the interleaved image data |
interleaved2 | Third 16 elements holding the interleaved image data |
reversedInterleaved0 | Resulting first 16 elements holding the interleaved image data with reversed channel order |
reversedInterleaved1 | Resulting second 16 elements holding the interleaved image data with reversed channel order |
reversedInterleaved2 | Resulting third 16 elements holding the interleaved image data with reversed channel order |
|
static |
Reverses the order of the first and last channel of 48 elements (16 pixels) of an image with 3 interleaved channels and 8 bit per element (e.g., RGB24 to BGR24).
interleaved | 48 elements of an image with 3 channels and 8 bit per element (48 bytes) |
reversedInterleaved | Resulting 48 elements with reversed channel order |
|
inlinestatic |
Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channels and 8 bit per element (in place).
interleaved | 48 elements of an image with 3 channels and 8 bit per element (48 bytes) |
|
static |
Reverses the order of the channels of 16 pixels (64 elements) of an image with 4 interleaved channels and 8 bit per element (e.g., RGBA32 to ABGR24).
interleaved | 64 elements of an image with 4 channels and 8 bit per element (64 bytes) |
reversedInterleaved | Resulting 64 elements with reversed channel order |
|
inlinestatic |
Reverses the order of 48 elements with 8 bit per element.
elements0 | First 16 elements |
elements1 | Second 16 elements |
elements2 | Third 16 elements |
reversedElements0 | Resulting reversed first 16 elements |
reversedElements1 | Resulting reversed second 16 elements |
reversedElements2 | Resulting reversed third 16 elements |
|
inlinestatic |
Reverses the order of 48 elements with 8 bit per element.
elements | 48 elements that will be reversed |
reversedElements | Resulting reversed 48 elements |
|
inlinestatic |
Reverses the order of 48 elements with 8 bit per element (in place).
elements | 48 elements that will be reversed |
|
inlinestatic |
Sets a 128i value by two 64 bit values.
high64 | High 64 bits to be set |
low64 | Low 64 bits to be set |
|
inlinestatic |
Shifts the channels of a 4 channel 32 bit pixels to the back and moves the back channel to the front channel and mirrors the four individual pixels.
elements | 16 elements of 4 pixels to be shifted and mirrored |
shiftedElements | Resulting shifted and mirrored elements |
|
inlinestatic |
Shifts the channels of a 4 channel 32 bit pixels to the front and moves the front channel to the back channel and mirrors the four individual pixels.
elements | 16 elements of 4 pixels to be shifted and mirrored |
shiftedElements | Resulting shifted and mirrored elements |
|
inlinestatic |
Shifts the channels of a 4 channel 32 bit pixels to the back and moves the back channel to the front channel.
The function takes four pixels DCBA DCBA DCBA DCBA and provides CBAD CBAD CBAD CBAD.
elements | 16 elements of 4 pixels to be shifted |
shiftedElements | Resulting shifted elements |
|
inlinestatic |
Shifts the channels of a 4 channel 32 bit pixels to the front and moves the front channel to the back channel.
The function takes four pixels DCBA DCBA DCBA DCBA and provides ADCB ADCB ADCB ADCB.
elements | 16 elements of 4 pixels to be shifted |
shiftedElements | Resulting shifted elements |
|
inlinestatic |
Shuffles the lower four 8 bits to the low 8 bits of four 32 bit elements.
Given: PONM-LKJI-HGFE-DCBA
Result: 000D-000C-000B-000A
value | Value to be shuffled |
|
inlinestatic |
Shuffles pairs of two neighbors of the high 64 bits to the low 8 bits of eight 16 bit elements.
value | Value to be shuffled |
|
inlinestatic |
Shuffles pairs of two neighbors of the low 64 bits to the low 8 bits of eight 16 bit elements.
value | Value to be shuffled |
|
inlinestatic |
Shuffles pairs of four neighbors of the high 64 bits to the low 8 bits of eight 16 bit elements.
Given: PONM-LKJI-HGFE-DCBA
Result: 0P0L-0O0K-0N0J-0M0I
value | Value to be shuffled |
|
inlinestatic |
Shuffles pairs of four neighbors of the low 64 bits to the low 8 bits of eight 16 bit elements.
Given: PONM-LKJI-HGFE-DCBA
Result: 0H0D-0G0C-0F0B-0E0A
value | Value to be shuffled |
|
inlinestatic |
Returns the interpolated sum of square difference for one 2 channel 16 bit pixel.
pixel0 | Uppler left pixel in the first frame |
pixel1 | Uppler left pixel in the second frame |
size0 | Size of one frame row in bytes |
size1 | Size of one frame row in bytes |
f0x_y_ | Product of the inverse fx and the inverse fy interpolation factor for the first image |
f0xy_ | Product of the fx and the inverse fy interpolation factor for the first image |
f0x_y | Product of the inverse fx and the fy interpolation factor for the first image |
f0xy | Product of the fx and the fy interpolation factor for the first image |
f1x_y_ | Product of the inverse fx and the inverse fy interpolation factor for the second image |
f1xy_ | Product of the fx and the inverse fy interpolation factor for the second image |
f1x_y | Product of the inverse fx and the fy interpolation factor for the second image |
f1xy | Product of the fx and the fy interpolation factor for the second image |
|
inlinestatic |
Returns the interpolated sum of square difference for one 2 channel 16 bit pixel.
pixel0 | Uppler left pixel in the first frame |
pixel1 | Uppler left pixel in the second frame |
size0 | Size of one frame row in bytes |
size1 | Size of one frame row in bytes |
f1x_y_ | Product of the inverse fx and the inverse fy interpolation factor for the second image |
f1xy_ | Product of the fx and the inverse fy interpolation factor for the second image |
f1x_y | Product of the inverse fx and the fy interpolation factor for the second image |
f1xy | Product of the fx and the fy interpolation factor for the second image |
|
inlinestatic |
Stores a 128i value to the memory.
value | Value to be stored |
buffer | Buffer receiving the value (does not need to be aligned on any particular boundary) |
|
inlinestatic |
Sums 16 elements with 8 bit per element.
The results are stored as first 32 bit integer value (high bits left, low bits right): ???? ???? ???? 0000.
elements | 16 elements holding the image data |
|
inlinestatic |
Sums 16 elements with 8 bit per element.
The results are stored as first 32 bit integer value (high bits left, low bits right): ???? ???? ???? 0000.
elements | 16 elements holding the image data |
|
inlinestatic |
Sums the last 15 elements of a 16 elements buffer with 8 bit per element, the beginning 1 element is interpreted as zero.
However, the provided buffer must be at least 16 bytes large as the entire 16 bytes will be loaded to the SSE register.
Thus, this functions handles one buffer with this pattern (while the memory starts left and ends right): [NA 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15]. The results are stored as first 32 bit integer value (high bits left, low bits right): ???? ???? ???? 0000.
elements | (1+) 15 elements holding the image data |
|
inlinestatic |
Sums the first 15 elements of a buffer with 8 bit per element.
This function supports to load the 15 elements from a buffer with only 15 bytes or with a buffer with at least 16 bytes.
If the provided buffer holds at least 16 bytes the load function is much faster compared to the case if the buffer is not larger than 15 bytes.
The results are stored as first 32 bit integer value (high bits left, low bits right): ???? ???? ???? 0000.
elements | 16 elements holding the image data |
tBufferHas16Bytes | True, if the buffer holds at least 16 bytes; False, if the buffer holds only 15 bytes |
|
static |
Adds the four (all four) individual 32 bit float of a m128 value and returns the result.
value | The value which elements will be added |
|
static |
Adds the two (all two) individual 64 bit float of a m128 value and returns the result.
value | The value which elements will be added |
|
static |
Adds the four (all four) individual 32 bit unsigned integer values of a m128i value and returns the result.
value | The value which elements will be added |
|
inlinestatic |
Adds the first two individual 32 bit unsigned integer values of a m128i value and returns the result.
value | The value which elements will be added |
|
inlinestatic |
Adds the first and the second 32 bit unsigned integer values of a m128i value and returns the result.
value | The value which elements will be added |
|
inlinestatic |
Sum absolute differences determination for 16 elements of an 16 elements buffer with 8 bit precision.
image0 | First 16 elements to determine the ssd for, may be non aligned |
image1 | Second 16 elements to determine the ssd for, may be non aligned |
|
inlinestatic |
Sum absolute differences determination for the last 11 elements of a 16 elements buffer with 8 bit precision.
image0 | First 11 elements to determine the sad for, may be non aligned |
image1 | Second 11 elements to determine the sad for, may be non aligned |
|
inlinestatic |
Sum absolute differences determination for the first 10 elements of a buffer with 8 bit precision.
This function supports to load the 10 elements from a buffer with only 10 bytes or with a buffer with at least 16 bytes.
image0 | First 10 elements to determine the sad for, may be non aligned |
image1 | Second 10 elements to determine the sad for, may be non aligned |
tBufferHas16Bytes | True, if the buffer holds at least 16 bytes; False, if the buffer holds 10 bytes only |
|
inlinestatic |
Sum absolute differences determination for the first 15 elements of a buffer with 8 bit precision.
This function supports to load the 15 elements from a buffer with only 15 bytes or with a buffer with at least 16 bytes.
image0 | First 15 elements to determine the sad for, may be non aligned |
image1 | Second 15 elements to determine the sad for, may be non aligned |
tBufferHas16Bytes | True, if the buffer holds at least 16 bytes; False, if the buffer holds 15 bytes only |
|
inlinestatic |
Sums 15 elements individually for an interleaved pixel format with 3 channels and 8 bit per channel and element.
The results are stored in three 32 bit integer values (high bits left, low bits right): ???? 2222 1111 0000.
interleaved | 45 elements holding the interleaved image data |
|
inlinestatic |
Sums 16 elements individually for an interleaved pixel format with 3 channels and 8 bit per channel and element.
The results are stored in three 32 bit integer values (high bits left, low bits right): ???? 2222 1111 0000.
interleaved0 | First 16 elements holding the interleaved image data |
interleaved1 | Second 16 elements holding the interleaved image data |
interleaved2 | Third 16 elements holding the interleaved image data |
|
inlinestatic |
Sums 16 elements individually for an interleaved pixel format with 3 channels and 8 bit per channel and element.
The results are stored in three 32 bit integer values (high bits left, low bits right): ???? 2222 1111 0000.
interleaved | 48 elements holding the interleaved image data |
|
inlinestatic |
Sum square difference determination for 16 elements with 8 bit precision.
row0 | First 16 elements to determine the ssd for |
row1 | Second 16 elements to determine the ssd for |
|
inlinestatic |
Sum square difference determination for 16 elements with 8 bit precision.
image0 | First 16 elements to determine the ssd for, may be non aligned |
image1 | Second 16 elements to determine the ssd for, may be non aligned |
|
inlinestatic |
Sum square difference determination for 16 elements with 8 bit precision.
image0 | First 16 elements to determine the ssd for, may be non aligned |
image1 | Second 16 elements to determine the ssd for, may be non aligned |
|
inlinestatic |
Sum square difference determination for the last 12 elements of an 16 elements buffer with 8 bit precision, the beginning 4 elements are interpreted as zero.
However, the provides buffers must be at least 16 bytes large as the entire 16 bytes will be loaded to the SSE registers.
Thus, this function handles two buffers with this pattern (while the memory starts left and ends right): [NA NA NA NA 04 05 06 07 08 09 10 11 12 13 14 15].
image0 | First (4+) 12 elements to determine the ssd for, with any alignment |
image1 | Second (4+) 12 elements to determine the ssd for, with any alignment |
|
inlinestatic |
Sum square difference determination for the last 13 elements of an 16 elements buffer with 8 bit precision, the beginning 3 elements are interpreted as zero.
However, the provides buffers must be at least 16 bytes large as the entire 16 bytes will be loaded to the SSE registers.
Thus, this function handles two buffers with this pattern (while the memory starts left and ends rights: [NA NA NA 03 04 05 06 07 08 09 10 11 12 13 14 15].
image0 | First (3+) 13 elements to determine the ssd for, may be non aligned |
image1 | Second (3+) 13 elements to determine the ssd for, may be non aligned |
|
inlinestatic |
Sum square difference determination for the first 12 elements of an 16 elements buffer with 8 bit precision, the remaining 4 elements are set to zero.
However, the provides buffers must be at least 16 bytes large as the entire 16 bytes will be loaded to the SSE registers.
Thus, this function handles two buffers with this pattern (while the memory starts left and ends rights: [00 01 02 03 04 05 06 07 08 09 10 11 NA NA NA NA].
image0 | First 12 (+4) elements to determine the ssd for, with any alignment |
image1 | Second 12 (+4) elements to determine the ssd for, with any alignment |
|
inlinestatic |
Sum square difference determination for the first 13 elements of a buffer with 8 bit precision.
This function supports to load the 13 elements from a buffer with only 13 bytes or with a buffer with at least 16 bytes.
image0 | First 13 elements to determine the ssd for, may be non aligned |
image1 | Second 13 elements to determine the ssd for, may be non aligned |
tBufferHas16Bytes | True, if the buffer holds at least 16 bytes; False, if the buffer holds 13 bytes only |
|
inlinestatic |
Sum square difference determination for the first 15 elements of a buffer with 8 bit precision.
This function supports to load the 15 elements from a buffer with only 15 bytes or with a buffer with at least 16 bytes.
image0 | First 15 elements to determine the ssd for, may be non aligned |
image1 | Second 15 elements to determine the ssd for, may be non aligned |
tBufferHas16Bytes | True, if the buffer holds at least 16 bytes; False, if the buffer holds 15 bytes only |
|
inlinestatic |
Sum square differences determination for the last 11 elements of an 16 elements buffer with 8 bit precision.
image0 | First 11 elements to determine the ssd for, may be non aligned |
image1 | Second 11 elements to determine the ssd for, may be non aligned |
|
inlinestatic |
Reverses the order of the first and last channel of two sets of 48 elements of an image with 3 interleaved channels and 8 bit per element and further swaps both sets.
first | First 48 elements of an image with 3 channels and 8 bit per element (48 bytes) |
second | Second 48 elements of an image with 3 channels and 8 bit per element (48 bytes) |
|
inlinestatic |
Reverses the order of two sets of 48 elements with 8 bit per element and further swaps both sets.
first | First 48 elements that will be reversed and swapped with the second 48 elements |
second | Second 48 elements that will be reversed and swapped with the first 48 elements |
|
inlinestatic |
Returns one specific 16 bit unsigned integer value of a m128i value object.
value | The value from which the 16 bit value will be returned |
tIndex | The index of the requested 16 bit integer value, with range [0, 7] |
|
inlinestatic |
Returns one specific 32 bit unsigned integer value of a m128i value object.
value | The value from which the 32 bit value will be returned |
tIndex | The index of the requested 32 bit integer value, with range [0, 3] |
|
inlinestatic |
Returns one specific 8 bit unsigned integer value of a m128i value object.
value | The value from which the 8 bit value will be returned |
tIndex | The index of the requested 8 bit integer value, with range [0, 15] |
|
inlinestatic |
Returns one specific 8 bit unsigned integer value of a m128i value object.
value | The value from which the 8 bit value will be returned |
index | The index of the requested 8 bit integer value, with range [0, 15] |