This class implements computer vision functions using NEON extensions. More...

#include <NEON.h>

Static Public Member Functions
static void	prefetchT0 (const void *const data)
	Prefetches a block of temporal memory into all cache levels.

static void	prefetchT1 (const void *const data)
	Prefetches a block of temporal memory in all cache levels except 0th cache level.

static void	prefetchT2 (const void *const data)
	Prefetches a block of temporal memory in all cache levels, except 0th and 1st cache levels.

static void	prefetchNTA (const void *const data)
	Prefetches a block of non-temporal memory into non-temporal cache structure.

static uint32x4_t	sumSquareDifferences8BitBack9Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square differences determination for the last 9 elements of an 16 elements buffer with 8 bit precision.

static uint32x4_t	sumSquareDifferences8BitBack10Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square differences determination for the last 10 elements of an 16 elements buffer with 8 bit precision.

static uint32x4_t	sumSquareDifferences8BitBack11Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square differences determination for the last 11 elements of an 16 elements buffer with 8 bit precision.

static uint32x4_t	sumSquareDifferences8BitBack12Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square differences determination for the last 12 elements of an 16 elements buffer with 8 bit precision.

static uint32x4_t	sumSquareDifferences8BitBack13Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square differences determination for the last 13 elements of an 16 elements buffer with 8 bit precision.

static uint32x4_t	sumSquareDifferences8BitBack14Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square differences determination for the last 14 elements of an 16 elements buffer with 8 bit precision.

static uint32x4_t	sumSquareDifferences8BitBack15Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square differences determination for the last 15 elements of an 16 elements buffer with 8 bit precision.

static uint32x4_t	sumSquareDifference8BitFront9Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square difference determination for the first 9 elements of an 16 elements buffer with 8 bit precision.

static uint32x4_t	sumSquareDifference8BitFront10Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square difference determination for the first 10 elements of an 16 elements buffer with 8 bit precision.

static uint32x4_t	sumSquareDifference8BitFront11Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square difference determination for the first 11 elements of an 16 elements buffer with 8 bit precision.

static uint32x4_t	sumSquareDifference8BitFront12Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square difference determination for the first 12 elements of an 16 elements buffer with 8 bit precision.

static uint32x4_t	sumSquareDifference8BitFront13Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square difference determination for the first 13 elements of an 16 elements buffer with 8 bit precision.

static uint32x4_t	sumSquareDifference8BitFront14Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square difference determination for the first 14 elements of an 16 elements buffer with 8 bit precision.

static uint32x4_t	sumSquareDifference8BitFront15Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square difference determination for the first 15 elements of an 16 elements buffer with 8 bit precision.

static uint32x4_t	sumSquareDifference8Bit16Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square difference determination for 16 elements with 8 bit precision.

static uint32x4_t	sumSquareDifference8Bit16Elements (const uint8x16_t &row0, const uint8x16_t &row1)
	Sum square difference determination for 16 elements with 8 bit precision.

static OCEAN_FORCE_INLINE void	average16Elements1Channel8Bit2x2 (const uint8_t const row0, const uint8_t const row1, uint8_t *const result)
	Averages 16 elements of 2x2 blocks for 1 channel 8 bit frames.

static OCEAN_FORCE_INLINE void	average32Elements1Channel8Bit2x2 (const uint8_t const row0, const uint8_t const row1, uint8_t *const result)
	Averages 32 elements of 2x2 blocks for 1 channel 8 bit frames.

static void	average16ElementsBinary1Channel8Bit2x2 (const uint8_t const image0, const uint8_t const image1, uint8_t *const result, const uint8_t threshold=192u)
	Averages 16 elements of 2x2 blocks for 1 binary (x00 or 0xFF) frames.

static OCEAN_FORCE_INLINE void	average32Elements2Channel16Bit2x2 (const uint8_t const row0, const uint8_t const row1, uint8_t *const result)
	Averages 32 elements of 2x2 blocks for 2 channel 16 bit frames.

static OCEAN_FORCE_INLINE void	average64Elements2Channel16Bit2x2 (const uint8_t const row0, const uint8_t const row1, uint8_t *const result)
	Averages 64 elements of 2x2 blocks for 2 channel 16 bit frames.

static OCEAN_FORCE_INLINE void	average48Elements3Channel24Bit2x2 (const uint8_t const row0, const uint8_t const row1, uint8_t *const result)
	Averages 48 elements of 2x2 blocks for 3 channel 24 bit frames.

static OCEAN_FORCE_INLINE void	average64Elements4Channel32Bit2x2 (const uint8_t const row0, const uint8_t const row1, uint8_t *const result)
	Averages 64 elements of 2x2 blocks for 4 channel 32 bit frames.

static void	average24Elements1Channel8Bit3x3 (const uint8_t const image0, const uint8_t const image1, const uint8_t const image2, uint8_t const result)
	Averages 24 elements of 3x3 blocks for 1 channel 8 bit frames.

static void	average48Elements1Channel8Bit3x3Approximation (const uint8_t const image0, const uint8_t const image1, const uint8_t const image2, uint8_t const result)
	Averages 48 elements of 3x3 blocks for 1 channel 8 bit frames.

static void	gradientHorizontalVertical8Elements1Channel8Bit (const uint8_t source, int8_t response, const unsigned int width)
	Determines the horizontal and the vertical gradients for 8 following pixels for a given 1 channel 8 bit frame.

static void	gradientHorizontalVertical8Elements3Products1Channel8Bit (const uint8_t source, int16_t response, const unsigned int width)
	Determines the squared horizontal and vertical gradients and the product of both gradients for 8 following pixels for a given 1 channel 8 bit frame.

static uint32x4_t	sumSquareDifference8Bit8Elements (const uint8_t const image0, const uint8_t const image1)
	Sum square difference determination for 8 elements with 8 bit precision.

static uint32x4_t	sumSquareDifference8Bit8Elements (const uint8x8_t &row0, const uint8x8_t &row1)
	Sum square difference determination for 8 elements with 8 bit precision.

static uint32x4_t	sumAbsoluteDifference8Bit16Elements (const uint8_t const image0, const uint8_t const image1)
	Sum absolute difference determination for 16 elements with 8 bit precision.

static uint32x4_t	sumAbsoluteDifference8Bit16Elements (const uint8x16_t &row0, const uint8x16_t &row1)
	Sum absolute difference determination for 16 elements with 8 bit precision.

static OCEAN_FORCE_INLINE unsigned int	sum32x4ByLanes (const uint32x4_t &value)
	Sums the four 32 bit values and returns the result.

static OCEAN_FORCE_INLINE uint32x4_t	removeHighBits32_16 (const uint32x4_t &value)
	Removes (sets to zero) the high 16 bits of four 32 bit elements.

static OCEAN_FORCE_INLINE uint16x4_t	removeHighBits16_8 (const uint16x4_t &value)
	Removes (sets to zero) the high 8 bits of four 16 bit elements.

static OCEAN_FORCE_INLINE uint16x8_t	removeHighBits16_8 (const uint16x8_t &value)
	Removes (sets to zero) the high 8 bits of eight 16 bit elements.

static OCEAN_FORCE_INLINE uint32x4_t	moveHighBits32_16 (const uint32x4_t &value)
	Moves the high 16 bits of four 32 bit elements to the low 16 bits and fill the high bits with 0.

static OCEAN_FORCE_INLINE uint16x4_t	moveHighBits16_8 (const uint16x4_t &value)
	Moves the high 8 bits of four 16 bit elements to the low 8 bits and fill the high bits with 0.

static OCEAN_FORCE_INLINE uint16x8_t	moveHighBits16_8 (const uint16x8_t &value)
	Moves the high 8 bits of eight 16 bit elements to the low 8 bits and fill the high bits with 0.

static OCEAN_FORCE_INLINE uint16x8_t	combineLowBits32x4to16x8 (const uint32x4_t &low, const uint32x4_t &high)
	Combines eight 32 bit values (holding 16 bit information) two eight 16 bit values.

static OCEAN_FORCE_INLINE uint8x16_t	combineLowBits16x8to8x16 (const uint16x8_t &low, const uint16x8_t &high)
	Combines sixteen 16 bit values (holding 8 bit information) two sixteen 8 bit values.

static OCEAN_FORCE_INLINE int32x4_t	sum16Bit4Blocks3x3 (const short const rowTop, const short const rowCenter, const short *const rowBottom)
	Determines the four sums of four successive (overlapping) 3x3 blocks of signed 16 bit integer values.

static OCEAN_FORCE_INLINE uint64x2_t	multiply (const uint64x2_t &value_u_64x2, const uint32x2_t &value_u_32x2)
	Multiplies an two uint64_t value with two uint32_t value and stores the results in two uint64_t values.

static OCEAN_FORCE_INLINE int32x4_t	copySign (const uint32x4_t &signReceiver, const int32x4_t &signProvider)
	Copies the sign of a given value to another one.

static OCEAN_FORCE_INLINE uint8x16_t	cast16ElementsNEON (const float32x4_t &sourceA_f_32x4, const float32x4_t &sourceB_f_32x4, const float32x4_t &sourceC_f_32x4, const float32x4_t &sourceD_f_32x4)
	Casts 16 float elements to 16 uint8_t elements.

static OCEAN_FORCE_INLINE uint8x16_t	cast16ElementsNEON (const float *const source)
	Casts 16 float elements to 16 uint8_t elements.

static OCEAN_FORCE_INLINE float32x4x4_t	cast16ElementsNEON (const uint8x16_t &source_u_8x16)
	Casts 16 uint8_t elements to 16 float elements.

static OCEAN_FORCE_INLINE float32x4x4_t	cast16ElementsNEON (const uint8_t *const source)
	Casts 16 uint8_t elements to 16 float elements.

Static Private Member Functions
static unsigned int	interpolation2Channel16Bit1x1 (const uint8_t *const pixel, const unsigned int size, const unsigned int fx_y_, const unsigned int fxy_, const unsigned int fx_y, const unsigned int fxy)
	Returns the interpolated pixel values for one 2 channel 16 bit pixel.

static unsigned int	ssd2Channel16Bit1x1 (const uint8_t const pixel0, const uint8_t const pixel1, const unsigned int size0, const unsigned int size1, const unsigned int f1x_y_, const unsigned int f1xy_, const unsigned int f1x_y, const unsigned int f1xy)
	Returns the interpolated sum of square difference for one 2 channel 16 bit pixel.

static unsigned int	ssd2Channel16Bit1x1 (const uint8_t const pixel0, const uint8_t const pixel1, const unsigned int size0, const unsigned int size1, const unsigned int f0x_y_, const unsigned int f0xy_, const unsigned int f0x_y, const unsigned int f0xy, const unsigned int f1x_y_, const unsigned int f1xy_, const unsigned int f1x_y, const unsigned int f1xy)
	Returns the interpolated sum of square difference for one 2 channel 16 bit pixel.

Detailed Description

This class implements computer vision functions using NEON extensions.

Member Function Documentation

◆ average16Elements1Channel8Bit2x2()

OCEAN_FORCE_INLINE void Ocean::CV::NEON::average16Elements1Channel8Bit2x2	(	const uint8_t *const	row0,
		const uint8_t *const	row1,
		uint8_t *const	result
	)

static

Averages 16 elements of 2x2 blocks for 1 channel 8 bit frames.

The function takes two rows of 16 elements and returns 8 average elements (8 averaged pixels, each with 1 channels).

Parameters

row0	First row of 16 elements (16 pixels), must be valid
row1	Second row of 16 elements (16 pixels), must be valid
result	Resulting 8 average elements (8 pixels), must be valid

◆ average16ElementsBinary1Channel8Bit2x2()

void Ocean::CV::NEON::average16ElementsBinary1Channel8Bit2x2	(	const uint8_t *const	image0,
		const uint8_t *const	image1,
		uint8_t *const	result,
		const uint8_t	threshold = `192u`
	)

inlinestatic

Averages 16 elements of 2x2 blocks for 1 binary (x00 or 0xFF) frames.

The function takes two rows of 16 elements and returns 8 average elements (8 averaged pixels, each with 1 channels).

Parameters

image0	First row of 16 elements
image1	Second row of 16 elements
threshold	Minimal threshold to result in a pixel with value 255
result	Resulting 8 average elements

◆ average24Elements1Channel8Bit3x3()

void Ocean::CV::NEON::average24Elements1Channel8Bit3x3	(	const uint8_t *const	image0,
		const uint8_t *const	image1,
		const uint8_t *const	image2,
		uint8_t *const	result
	)

inlinestatic

Averages 24 elements of 3x3 blocks for 1 channel 8 bit frames.

The function takes two rows of 24 elements and returns 8 average elements (8 averaged pixels, each with 1 channels).

Parameters

image0	First row of 24 elements
image1	Second row of 24 elements
image2	Third row of 24 elements
result	Resulting 8 average elements

 | 1 2 1 |

1/16 | 2 4 2 | | 1 2 1 |

◆ average32Elements1Channel8Bit2x2()

OCEAN_FORCE_INLINE void Ocean::CV::NEON::average32Elements1Channel8Bit2x2	(	const uint8_t *const	row0,
		const uint8_t *const	row1,
		uint8_t *const	result
	)

static

Averages 32 elements of 2x2 blocks for 1 channel 8 bit frames.

The function takes two rows of 32 elements and returns 16 average elements (16 averaged pixels, each with 1 channels).

Parameters

row0	First row of 32 elements (32 pixels), must be valid
row1	Second row of 32 elements (32 pixels), must be valid
result	Resulting 16 average elements (16 pixels), must be valid

◆ average32Elements2Channel16Bit2x2()

OCEAN_FORCE_INLINE void Ocean::CV::NEON::average32Elements2Channel16Bit2x2	(	const uint8_t *const	row0,
		const uint8_t *const	row1,
		uint8_t *const	result
	)

static

Averages 32 elements of 2x2 blocks for 2 channel 16 bit frames.

The function takes two rows of 32 elements and returns 16 average elements (8 averaged pixels, each with 2 channels).

Parameters

row0	First row of 32 elements (16 pixels), must be valid
row1	Second row of 32 elements (16 pixels), must be valid
result	Resulting 16 average elements (8 pixels), must be valid

◆ average48Elements1Channel8Bit3x3Approximation()

void Ocean::CV::NEON::average48Elements1Channel8Bit3x3Approximation	(	const uint8_t *const	image0,
		const uint8_t *const	image1,
		const uint8_t *const	image2,
		uint8_t *const	result
	)

inlinestatic

Averages 48 elements of 3x3 blocks for 1 channel 8 bit frames.

The function takes two rows of 48 elements and returns 16 average elements (16 averaged pixels, each with 1 channels).
Beware: This function calculates an approximation only.

Parameters

image0	First row of 48 elements
image1	Second row of 48 elements
image2	Third row of 48 elements
result	Resulting 16 average elements

 | 1 2 1 |

1/16 | 2 4 2 | | 1 2 1 |

◆ average48Elements3Channel24Bit2x2()

OCEAN_FORCE_INLINE void Ocean::CV::NEON::average48Elements3Channel24Bit2x2	(	const uint8_t *const	row0,
		const uint8_t *const	row1,
		uint8_t *const	result
	)

static

Averages 48 elements of 2x2 blocks for 3 channel 24 bit frames.

The function takes two rows of 48 elements and returns 24 average elements (8 averaged pixels, each with 3 channels).

Parameters

row0	First row of 48 elements (16 pixels), must be valid
row1	Second row of 48 elements (16 pixels), must be valid
result	Resulting 24 average elements (8 pixels), must be valid

◆ average64Elements2Channel16Bit2x2()

OCEAN_FORCE_INLINE void Ocean::CV::NEON::average64Elements2Channel16Bit2x2	(	const uint8_t *const	row0,
		const uint8_t *const	row1,
		uint8_t *const	result
	)

static

Averages 64 elements of 2x2 blocks for 2 channel 16 bit frames.

The function takes two rows of 64 elements and returns 32 average elements (16 averaged pixels, each with 2 channels).

Parameters

row0	First row of 64 elements (32 pixels), must be valid
row1	Second row of 64 elements (32 pixels), must be valid
result	Resulting 32 average elements (16 pixels), must be valid

◆ average64Elements4Channel32Bit2x2()

OCEAN_FORCE_INLINE void Ocean::CV::NEON::average64Elements4Channel32Bit2x2	(	const uint8_t *const	row0,
		const uint8_t *const	row1,
		uint8_t *const	result
	)

static

Averages 64 elements of 2x2 blocks for 4 channel 32 bit frames.

The function takes two rows of 64 elements and returns 32 average elements (16 averaged pixels, each with 4 channels).

Parameters

row0	First row of 64 elements (16 pixels), must be valid
row1	Second row of 64 elements (16 pixels), must be valid
result	Resulting 32 average elements (8 pixels), must be valid

◆ cast16ElementsNEON() [1/4]

OCEAN_FORCE_INLINE uint8x16_t Ocean::CV::NEON::cast16ElementsNEON ( const float *const source )

static

Casts 16 float elements to 16 uint8_t elements.

Parameters

source The 16 float elements, must be valid

Returns: The resulting 16 uint8_t elements

◆ cast16ElementsNEON() [2/4]

OCEAN_FORCE_INLINE uint8x16_t Ocean::CV::NEON::cast16ElementsNEON	(	const float32x4_t &	sourceA_f_32x4,
		const float32x4_t &	sourceB_f_32x4,
		const float32x4_t &	sourceC_f_32x4,
		const float32x4_t &	sourceD_f_32x4
	)

static

Casts 16 float elements to 16 uint8_t elements.

Parameters

sourceA_f_32x4	The first 4 float elements
sourceB_f_32x4	The second 4 float elements
sourceC_f_32x4	The third 4 float elements
sourceD_f_32x4	The fourth 4 float elements

Returns: The resulting 16 uint8_t elements

◆ cast16ElementsNEON() [3/4]

OCEAN_FORCE_INLINE float32x4x4_t Ocean::CV::NEON::cast16ElementsNEON ( const uint8_t *const source )

static

Casts 16 uint8_t elements to 16 float elements.

Parameters

source The 16 uint8_t elements, must be valid

Returns: The resulting 16 float elements

◆ cast16ElementsNEON() [4/4]

OCEAN_FORCE_INLINE float32x4x4_t Ocean::CV::NEON::cast16ElementsNEON ( const uint8x16_t & source_u_8x16 )

static

Casts 16 uint8_t elements to 16 float elements.

Parameters

source_u_8x16 The 16 uint8_t elements, must be valid

Returns: The resulting 16 float elements

◆ combineLowBits16x8to8x16()

OCEAN_FORCE_INLINE uint8x16_t Ocean::CV::NEON::combineLowBits16x8to8x16	(	const uint16x8_t &	low,
		const uint16x8_t &	high
	)

static

Combines sixteen 16 bit values (holding 8 bit information) two sixteen 8 bit values.

Further, the combination is done with saturation (the 16 bit values will be clamped to 8 bit values before the combination is done). Given: 0H0G-0F0E-0D0C-0B0A (low)
Given: 0P0O-0N0M-0L0K-0J0I (high)
Result: P-O-N-M-L-K-J-I-H-G-F-E-D-C-B-A

Parameters

low	The 128 bit register with the (resulting) lower 8 bit values
high	The 128 bit register with the (resulting) higher 8 bit values

Returns: The resulting 128 bit register with 16 bit values

◆ combineLowBits32x4to16x8()

OCEAN_FORCE_INLINE uint16x8_t Ocean::CV::NEON::combineLowBits32x4to16x8	(	const uint32x4_t &	low,
		const uint32x4_t &	high
	)

static

Combines eight 32 bit values (holding 16 bit information) two eight 16 bit values.

Further, the combination is done with saturation (the 32 bit values will be clamped to 16 bit values before the combination is done). Given: 00DD-00CC-00BB-00AA (low)
Given: 00HH-00GG-00FF-00EE (high)
Result: HH-GG-FF-EE-DD-CC-BB-AA

Parameters

low	The 128 bit register with the (resulting) lower 16 bit values
high	The 128 bit register with the (resulting) higher 16 bit values

Returns: The resulting 128 bit register with 16 bit values

◆ copySign()

OCEAN_FORCE_INLINE int32x4_t Ocean::CV::NEON::copySign	(	const uint32x4_t &	signReceiver,
		const int32x4_t &	signProvider
	)

static

Copies the sign of a given value to another one.

Parameters

signReceiver	First value receiving the sign from the second value
signProvider	Second value providing the sign for the first one

Returns: First value with the sign of the second one

◆ gradientHorizontalVertical8Elements1Channel8Bit()

void Ocean::CV::NEON::gradientHorizontalVertical8Elements1Channel8Bit	(	const uint8_t *	source,
		int8_t *	response,
		const unsigned int	width
	)

inlinestatic

Determines the horizontal and the vertical gradients for 8 following pixels for a given 1 channel 8 bit frame.

The resulting gradients are interleaved and each response is inside the range [-127, 127] as the standard response is divided by two.

Parameters

source	The source position of the first pixel to determine the gradient for, this pixel must not be a border pixel in the original frame
response	Resulting gradient responses, first the horizontal response then the vertical response (zipped) for 8 pixels
width	The width of the original frame in pixel, with range [10, infinity)

◆ gradientHorizontalVertical8Elements3Products1Channel8Bit()

void Ocean::CV::NEON::gradientHorizontalVertical8Elements3Products1Channel8Bit	(	const uint8_t *	source,
		int16_t *	response,
		const unsigned int	width
	)

inlinestatic

Determines the squared horizontal and vertical gradients and the product of both gradients for 8 following pixels for a given 1 channel 8 bit frame.

The resulting gradients are interleaved and each response is inside the range [-(127 * 127), 127 * 127] as the standard response is divided by two.

Parameters

source	The source position of the first pixel to determine the gradient for, this pixel must not be a border pixel in the original frame
response	Resulting gradient responses, first the horizontal response then the vertical response and afterwards the product of horizontal and vertical response (zipped) for 8 pixels
width	The width of the original frame in pixel, with range [10, infinity)

◆ interpolation2Channel16Bit1x1()

unsigned int Ocean::CV::NEON::interpolation2Channel16Bit1x1	(	const uint8_t *const	pixel,
		const unsigned int	size,
		const unsigned int	fx_y_,
		const unsigned int	fxy_,
		const unsigned int	fx_y,
		const unsigned int	fxy
	)

inlinestaticprivate

Returns the interpolated pixel values for one 2 channel 16 bit pixel.

Parameters

pixel	Uppler left pixel in the frame
size	Size of one frame row in bytes
fx_y_	Product of the inverse fx and the inverse fy interpolation factor
fxy_	Product of the fx and the inverse fy interpolation factor
fx_y	Product of the inverse fx and the fy interpolation factor
fxy	Product of the fx and the fy interpolation factor

Returns: Interpolated pixel values

◆ moveHighBits16_8() [1/2]

OCEAN_FORCE_INLINE uint16x4_t Ocean::CV::NEON::moveHighBits16_8 ( const uint16x4_t & value )

static

Moves the high 8 bits of four 16 bit elements to the low 8 bits and fill the high bits with 0.

Given: HGFE-DCBA
Result: 0H0F-0D0B

Parameters

value The value to remove the high bits for

Returns: Result

◆ moveHighBits16_8() [2/2]

OCEAN_FORCE_INLINE uint16x8_t Ocean::CV::NEON::moveHighBits16_8 ( const uint16x8_t & value )

static

Moves the high 8 bits of eight 16 bit elements to the low 8 bits and fill the high bits with 0.

Given: PONM-LKJI-HGFE-DCBA
Result: 0P0N-0L0J-0H0F-0D0B

Parameters

value The value to remove the high bits for

Returns: Result

◆ moveHighBits32_16()

OCEAN_FORCE_INLINE uint32x4_t Ocean::CV::NEON::moveHighBits32_16 ( const uint32x4_t & value )

static

Moves the high 16 bits of four 32 bit elements to the low 16 bits and fill the high bits with 0.

Given: PONM-LKJI-HGFE-DCBA
Result: 00PO-00LK-00HG-00DC

Parameters

value The value to remove the high bits for

Returns: Result

◆ multiply()

OCEAN_FORCE_INLINE uint64x2_t Ocean::CV::NEON::multiply	(	const uint64x2_t &	value_u_64x2,
		const uint32x2_t &	value_u_32x2
	)

static

Multiplies an two uint64_t value with two uint32_t value and stores the results in two uint64_t values.

This function does not check whether the multiplication results in an overflow.

Parameters

value_u_64x2	The uint64x2_t value to multiply
value_u_32x2	The uint32x2_t value to multiply

Returns: The resulting multiplication result

◆ prefetchNTA()

void Ocean::CV::NEON::prefetchNTA ( const void *const data )

inlinestatic

Prefetches a block of non-temporal memory into non-temporal cache structure.

Parameters

data	Data to be prefetched

◆ prefetchT0()

void Ocean::CV::NEON::prefetchT0 ( const void *const data )

inlinestatic

Prefetches a block of temporal memory into all cache levels.

Parameters

data	Data to be prefetched

◆ prefetchT1()

void Ocean::CV::NEON::prefetchT1 ( const void *const data )

inlinestatic

Prefetches a block of temporal memory in all cache levels except 0th cache level.

Parameters

data	Data to be prefetched

◆ prefetchT2()

void Ocean::CV::NEON::prefetchT2 ( const void *const data )

inlinestatic

Prefetches a block of temporal memory in all cache levels, except 0th and 1st cache levels.

Parameters

data	Data to be prefetched

◆ removeHighBits16_8() [1/2]

OCEAN_FORCE_INLINE uint16x4_t Ocean::CV::NEON::removeHighBits16_8 ( const uint16x4_t & value )

static

Removes (sets to zero) the high 8 bits of four 16 bit elements.

Given: HGFE-DCBA
Result: 0G0E-0C0A

Parameters

value The value to remove the high bits for

Returns: Result

◆ removeHighBits16_8() [2/2]

OCEAN_FORCE_INLINE uint16x8_t Ocean::CV::NEON::removeHighBits16_8 ( const uint16x8_t & value )

static

Removes (sets to zero) the high 8 bits of eight 16 bit elements.

Given: PONM-LKJI-HGFE-DCBA
Result: 0O0M-0K0I-0G0E-0C0A

Parameters

value The value to remove the high bits for

Returns: Result

◆ removeHighBits32_16()

OCEAN_FORCE_INLINE uint32x4_t Ocean::CV::NEON::removeHighBits32_16 ( const uint32x4_t & value )

static

Removes (sets to zero) the high 16 bits of four 32 bit elements.

Given: PONM-LKJI-HGFE-DCBA
Result: 00NM-00JI-00FE-00BA

Parameters

value The value to remove the high bits for

Returns: Result

◆ ssd2Channel16Bit1x1() [1/2]

unsigned int Ocean::CV::NEON::ssd2Channel16Bit1x1	(	const uint8_t *const	pixel0,
		const uint8_t *const	pixel1,
		const unsigned int	size0,
		const unsigned int	size1,
		const unsigned int	f0x_y_,
		const unsigned int	f0xy_,
		const unsigned int	f0x_y,
		const unsigned int	f0xy,
		const unsigned int	f1x_y_,
		const unsigned int	f1xy_,
		const unsigned int	f1x_y,
		const unsigned int	f1xy
	)

inlinestaticprivate

Returns the interpolated sum of square difference for one 2 channel 16 bit pixel.

Parameters

pixel0	Uppler left pixel in the first frame
pixel1	Uppler left pixel in the second frame
size0	Size of one frame row in bytes
size1	Size of one frame row in bytes
f0x_y_	Product of the inverse fx and the inverse fy interpolation factor for the first image
f0xy_	Product of the fx and the inverse fy interpolation factor for the first image
f0x_y	Product of the inverse fx and the fy interpolation factor for the first image
f0xy	Product of the fx and the fy interpolation factor for the first image
f1x_y_	Product of the inverse fx and the inverse fy interpolation factor for the second image
f1xy_	Product of the fx and the inverse fy interpolation factor for the second image
f1x_y	Product of the inverse fx and the fy interpolation factor for the second image
f1xy	Product of the fx and the fy interpolation factor for the second image

Returns: Interpolated sum of square difference

◆ ssd2Channel16Bit1x1() [2/2]

unsigned int Ocean::CV::NEON::ssd2Channel16Bit1x1	(	const uint8_t *const	pixel0,
		const uint8_t *const	pixel1,
		const unsigned int	size0,
		const unsigned int	size1,
		const unsigned int	f1x_y_,
		const unsigned int	f1xy_,
		const unsigned int	f1x_y,
		const unsigned int	f1xy
	)

inlinestaticprivate

Returns the interpolated sum of square difference for one 2 channel 16 bit pixel.

Parameters

pixel0	Uppler left pixel in the first frame
pixel1	Uppler left pixel in the second frame
size0	Size of one frame row in bytes
size1	Size of one frame row in bytes
f1x_y_	Product of the inverse fx and the inverse fy interpolation factor for the second image
f1xy_	Product of the fx and the inverse fy interpolation factor for the second image
f1x_y	Product of the inverse fx and the fy interpolation factor for the second image
f1xy	Product of the fx and the fy interpolation factor for the second image

Returns: Interpolated sum of square difference

◆ sum16Bit4Blocks3x3()

OCEAN_FORCE_INLINE int32x4_t Ocean::CV::NEON::sum16Bit4Blocks3x3	(	const short *const	rowTop,
		const short *const	rowCenter,
		const short *const	rowBottom
	)

static

Determines the four sums of four successive (overlapping) 3x3 blocks of signed 16 bit integer values.

Parameters

rowTop	The top row containing 6 short values, must be valid
rowCenter	The center row containing 6 short values, must be valid
rowBottom	The bottom row containing 6 short values, must be valid

Returns: The resulting four sums of the four 3x3 blocks

◆ sum32x4ByLanes()

OCEAN_FORCE_INLINE unsigned int Ocean::CV::NEON::sum32x4ByLanes ( const uint32x4_t & value )

static

Sums the four 32 bit values and returns the result.

Beware: This function is slow due the usage of the individual lanes, providing a large target buffer is much faster.

Parameters

value The value holding the four 32 bit values

Returns: Sum result

◆ sumAbsoluteDifference8Bit16Elements() [1/2]

uint32x4_t Ocean::CV::NEON::sumAbsoluteDifference8Bit16Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum absolute difference determination for 16 elements with 8 bit precision.

Parameters

image0	First 16 elements to determine the ssd for, may be non aligned
image1	Second 16 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum

◆ sumAbsoluteDifference8Bit16Elements() [2/2]

uint32x4_t Ocean::CV::NEON::sumAbsoluteDifference8Bit16Elements	(	const uint8x16_t &	row0,
		const uint8x16_t &	row1
	)

inlinestatic

Sum absolute difference determination for 16 elements with 8 bit precision.

Parameters

row0	First 16 elements to determine the ssd for
row1	Second 16 elements to determine the ssd for

Returns: SSD result distributed over four terms of the sum

◆ sumSquareDifference8Bit16Elements() [1/2]

uint32x4_t Ocean::CV::NEON::sumSquareDifference8Bit16Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square difference determination for 16 elements with 8 bit precision.

Parameters

image0	First 16 elements to determine the ssd for, may be non aligned
image1	Second 16 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum

◆ sumSquareDifference8Bit16Elements() [2/2]

uint32x4_t Ocean::CV::NEON::sumSquareDifference8Bit16Elements	(	const uint8x16_t &	row0,
		const uint8x16_t &	row1
	)

inlinestatic

Sum square difference determination for 16 elements with 8 bit precision.

Parameters

row0	First 16 elements to determine the ssd for
row1	Second 16 elements to determine the ssd for

Returns: SSD result distributed over four terms of the sum

◆ sumSquareDifference8Bit8Elements() [1/2]

uint32x4_t Ocean::CV::NEON::sumSquareDifference8Bit8Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square difference determination for 8 elements with 8 bit precision.

Parameters

image0	First 16 elements to determine the ssd for, may be non aligned
image1	Second 16 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum

◆ sumSquareDifference8Bit8Elements() [2/2]

uint32x4_t Ocean::CV::NEON::sumSquareDifference8Bit8Elements	(	const uint8x8_t &	row0,
		const uint8x8_t &	row1
	)

inlinestatic

Sum square difference determination for 8 elements with 8 bit precision.

Parameters

row0	First 16 elements to determine the ssd for
row1	Second 16 elements to determine the ssd for

Returns: SSD result distributed over four terms of the sum

◆ sumSquareDifference8BitFront10Elements()

uint32x4_t Ocean::CV::NEON::sumSquareDifference8BitFront10Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square difference determination for the first 10 elements of an 16 elements buffer with 8 bit precision.

Parameters

image0	First 10 elements to determine the ssd for, may be non aligned
image1	Second 10 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum

◆ sumSquareDifference8BitFront11Elements()

uint32x4_t Ocean::CV::NEON::sumSquareDifference8BitFront11Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square difference determination for the first 11 elements of an 16 elements buffer with 8 bit precision.

Parameters

image0	First 11 elements to determine the ssd for, may be non aligned
image1	Second 11 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum

◆ sumSquareDifference8BitFront12Elements()

uint32x4_t Ocean::CV::NEON::sumSquareDifference8BitFront12Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square difference determination for the first 12 elements of an 16 elements buffer with 8 bit precision.

Parameters

image0	First 12 elements to determine the ssd for, may be non aligned
image1	Second 12 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum

◆ sumSquareDifference8BitFront13Elements()

uint32x4_t Ocean::CV::NEON::sumSquareDifference8BitFront13Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square difference determination for the first 13 elements of an 16 elements buffer with 8 bit precision.

Parameters

image0	First 13 elements to determine the ssd for, may be non aligned
image1	Second 13 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum

◆ sumSquareDifference8BitFront14Elements()

uint32x4_t Ocean::CV::NEON::sumSquareDifference8BitFront14Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square difference determination for the first 14 elements of an 16 elements buffer with 8 bit precision.

Parameters

image0	First 14 elements to determine the ssd for, may be non aligned
image1	Second 14 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum

◆ sumSquareDifference8BitFront15Elements()

uint32x4_t Ocean::CV::NEON::sumSquareDifference8BitFront15Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square difference determination for the first 15 elements of an 16 elements buffer with 8 bit precision.

Parameters

image0	First 15 elements to determine the ssd for, may be non aligned
image1	Second 15 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum

◆ sumSquareDifference8BitFront9Elements()

uint32x4_t Ocean::CV::NEON::sumSquareDifference8BitFront9Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square difference determination for the first 9 elements of an 16 elements buffer with 8 bit precision.

Parameters

image0	First 9 elements to determine the ssd for, may be non aligned
image1	Second 9 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum

◆ sumSquareDifferences8BitBack10Elements()

uint32x4_t Ocean::CV::NEON::sumSquareDifferences8BitBack10Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square differences determination for the last 10 elements of an 16 elements buffer with 8 bit precision.

Parameters

image0	First 10 elements to determine the ssd for, may be non aligned
image1	Second 10 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum

◆ sumSquareDifferences8BitBack11Elements()

uint32x4_t Ocean::CV::NEON::sumSquareDifferences8BitBack11Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square differences determination for the last 11 elements of an 16 elements buffer with 8 bit precision.

Parameters

image0	First 11 elements to determine the ssd for, may be non aligned
image1	Second 11 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum

◆ sumSquareDifferences8BitBack12Elements()

uint32x4_t Ocean::CV::NEON::sumSquareDifferences8BitBack12Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square differences determination for the last 12 elements of an 16 elements buffer with 8 bit precision.

Parameters

image0	First 12 elements to determine the ssd for, may be non aligned
image1	Second 12 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum

◆ sumSquareDifferences8BitBack13Elements()

uint32x4_t Ocean::CV::NEON::sumSquareDifferences8BitBack13Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square differences determination for the last 13 elements of an 16 elements buffer with 8 bit precision.

Parameters

image0	First 13 elements to determine the ssd for, may be non aligned
image1	Second 13 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum

◆ sumSquareDifferences8BitBack14Elements()

uint32x4_t Ocean::CV::NEON::sumSquareDifferences8BitBack14Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square differences determination for the last 14 elements of an 16 elements buffer with 8 bit precision.

Parameters

image0	First 14 elements to determine the ssd for, may be non aligned
image1	Second 14 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum

◆ sumSquareDifferences8BitBack15Elements()

uint32x4_t Ocean::CV::NEON::sumSquareDifferences8BitBack15Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square differences determination for the last 15 elements of an 16 elements buffer with 8 bit precision.

Parameters

image0	First 15 elements to determine the ssd for, may be non aligned
image1	Second 15 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum

◆ sumSquareDifferences8BitBack9Elements()

uint32x4_t Ocean::CV::NEON::sumSquareDifferences8BitBack9Elements	(	const uint8_t *const	image0,
		const uint8_t *const	image1
	)

inlinestatic

Sum square differences determination for the last 9 elements of an 16 elements buffer with 8 bit precision.

Parameters

image0	First 9 elements to determine the ssd for, may be non aligned
image1	Second 9 elements to determine the ssd for, may be non aligned

Returns: SSD result distributed over four terms of the sum

The documentation for this class was generated from the following file:

NEON.h

Static Public Member Functions

Static Private Member Functions

Detailed Description

Member Function Documentation

◆ average16Elements1Channel8Bit2x2()

◆ average16ElementsBinary1Channel8Bit2x2()

◆ average24Elements1Channel8Bit3x3()

◆ average32Elements1Channel8Bit2x2()

◆ average32Elements2Channel16Bit2x2()

◆ average48Elements1Channel8Bit3x3Approximation()

◆ average48Elements3Channel24Bit2x2()

◆ average64Elements2Channel16Bit2x2()

◆ average64Elements4Channel32Bit2x2()

◆ cast16ElementsNEON() [1/4]

◆ cast16ElementsNEON() [2/4]

◆ cast16ElementsNEON() [3/4]

◆ cast16ElementsNEON() [4/4]

◆ combineLowBits16x8to8x16()

◆ combineLowBits32x4to16x8()

◆ copySign()

◆ gradientHorizontalVertical8Elements1Channel8Bit()

◆ gradientHorizontalVertical8Elements3Products1Channel8Bit()

◆ interpolation2Channel16Bit1x1()

◆ moveHighBits16_8() [1/2]

◆ moveHighBits16_8() [2/2]

◆ moveHighBits32_16()

◆ multiply()

◆ prefetchNTA()

◆ prefetchT0()

◆ prefetchT1()

◆ prefetchT2()

◆ removeHighBits16_8() [1/2]

◆ removeHighBits16_8() [2/2]

◆ removeHighBits32_16()

◆ ssd2Channel16Bit1x1() [1/2]

◆ ssd2Channel16Bit1x1() [2/2]

◆ sum16Bit4Blocks3x3()

◆ sum32x4ByLanes()

◆ sumAbsoluteDifference8Bit16Elements() [1/2]

◆ sumAbsoluteDifference8Bit16Elements() [2/2]

◆ sumSquareDifference8Bit16Elements() [1/2]

◆ sumSquareDifference8Bit16Elements() [2/2]

◆ sumSquareDifference8Bit8Elements() [1/2]

◆ sumSquareDifference8Bit8Elements() [2/2]

◆ sumSquareDifference8BitFront10Elements()

◆ sumSquareDifference8BitFront11Elements()

◆ sumSquareDifference8BitFront12Elements()

◆ sumSquareDifference8BitFront13Elements()

◆ sumSquareDifference8BitFront14Elements()

◆ sumSquareDifference8BitFront15Elements()

◆ sumSquareDifference8BitFront9Elements()

◆ sumSquareDifferences8BitBack10Elements()

◆ sumSquareDifferences8BitBack11Elements()

◆ sumSquareDifferences8BitBack12Elements()

◆ sumSquareDifferences8BitBack13Elements()

◆ sumSquareDifferences8BitBack14Elements()

◆ sumSquareDifferences8BitBack15Elements()

◆ sumSquareDifferences8BitBack9Elements()