8 #ifndef META_OCEAN_CV_DETECTOR_DESCRIPTOR_H
9 #define META_OCEAN_CV_DETECTOR_DESCRIPTOR_H
47 template <
unsigned int tBits>
48 static OCEAN_FORCE_INLINE
unsigned int calculateHammingDistance(
const void* descriptorA,
const void* descriptorB);
52 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 30
59 static OCEAN_FORCE_INLINE __m128i popcount8(
const __m128i value);
66 static OCEAN_FORCE_INLINE __m128i popcount64(
const __m128i value);
70 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 42
77 static OCEAN_FORCE_INLINE
unsigned int popcount128(
const __m128i value);
88 OCEAN_FORCE_INLINE
unsigned int Descriptor::calculateHammingDistance<128u>(
const void* descriptorA,
const void* descriptorB)
90 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 42
103 const __m128i xor_m128 = _mm_xor_si128(descriptorA_m128, descriptorB_m128);
107 #elif defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 30
123 const __m128i xor_m128 = _mm_xor_si128(descriptorA_m128, descriptorB_m128);
125 const __m128i countLowHigh_m128_64 =
popcount64(xor_m128);
126 const __m128i countHigh_m128_64 = _mm_unpackhi_epi64(countLowHigh_m128_64, countLowHigh_m128_64);
127 const __m128i count_m128 = _mm_add_epi32(countLowHigh_m128_64, countHigh_m128_64);
129 return _mm_cvtsi128_si32(count_m128);
131 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
133 const uint8x16_t descriptorA_u_8x16 = vld1q_u8((
const uint8_t*)(descriptorA));
134 const uint8x16_t descriptorB_u_8x16 = vld1q_u8((
const uint8_t*)(descriptorB));
136 const uint8x16_t xor_u_8x16 = veorq_u8(descriptorA_u_8x16, descriptorB_u_8x16);
138 const uint8x16_t count_u_8x16 = vcntq_u8(xor_u_8x16);
139 const uint16x8_t count_u_16x8 = vpaddlq_u8(count_u_8x16);
140 const uint32x4_t count_u_32x4 = vpaddlq_u16(count_u_16x8);
146 typedef std::bitset<128> Bitset;
147 static_assert(
sizeof(Bitset) == 128u / 8u,
"Invalid data type!");
149 Bitset bitsetA, bitsetB;
151 memcpy(&bitsetA, descriptorA,
sizeof(Bitset));
152 memcpy(&bitsetB, descriptorB,
sizeof(Bitset));
154 return (
unsigned int)(bitsetA ^ bitsetB).count();
160 OCEAN_FORCE_INLINE
unsigned int Descriptor::calculateHammingDistance<256u>(
const void* descriptorA,
const void* descriptorB)
162 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 42
172 const __m128i descriptorA_m128_0 =
SSE::load128i(((
const __m128i*)descriptorA) + 0);
173 const __m128i descriptorA_m128_1 =
SSE::load128i(((
const __m128i*)descriptorA) + 1);
175 const __m128i descriptorB_m128_0 =
SSE::load128i(((
const __m128i*)descriptorB) + 0);
176 const __m128i descriptorB_m128_1 =
SSE::load128i(((
const __m128i*)descriptorB) + 1);
178 const __m128i xor_m128_0 = _mm_xor_si128(descriptorA_m128_0, descriptorB_m128_0);
179 const __m128i xor_m128_1 = _mm_xor_si128(descriptorA_m128_1, descriptorB_m128_1);
183 #elif defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 30
196 const __m128i descriptorA_m128_0 =
SSE::load128i(((
const __m128i*)descriptorA) + 0);
197 const __m128i descriptorA_m128_1 =
SSE::load128i(((
const __m128i*)descriptorA) + 1);
199 const __m128i descriptorB_m128_0 =
SSE::load128i(((
const __m128i*)descriptorB) + 0);
200 const __m128i descriptorB_m128_1 =
SSE::load128i(((
const __m128i*)descriptorB) + 1);
202 const __m128i xor_m128_0 = _mm_xor_si128(descriptorA_m128_0, descriptorB_m128_0);
203 const __m128i xor_m128_1 = _mm_xor_si128(descriptorA_m128_1, descriptorB_m128_1);
205 const __m128i countLowHigh_m128_64_0 =
popcount64(xor_m128_0);
206 const __m128i countHigh_m128_64_0 = _mm_unpackhi_epi64(countLowHigh_m128_64_0, countLowHigh_m128_64_0);
207 const __m128i count_m128_0 = _mm_add_epi32(countLowHigh_m128_64_0, countHigh_m128_64_0);
208 const unsigned int hammingDistance_0 = _mm_cvtsi128_si32(count_m128_0);
210 const __m128i countLowHigh_m128_64_1 =
popcount64(xor_m128_1);
211 const __m128i countHigh_m128_64_1 = _mm_unpackhi_epi64(countLowHigh_m128_64_1, countLowHigh_m128_64_1);
212 const __m128i count_m128_1 = _mm_add_epi32(countLowHigh_m128_64_1, countHigh_m128_64_1);
213 const unsigned int hammingDistance_1 = _mm_cvtsi128_si32(count_m128_1);
215 return hammingDistance_0 + hammingDistance_1;
217 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
219 const uint8x16_t descriptorA_u_8x16_0 = vld1q_u8((
const uint8_t*)(descriptorA) + 0);
220 const uint8x16_t descriptorA_u_8x16_1 = vld1q_u8((
const uint8_t*)(descriptorA) + 16);
222 const uint8x16_t descriptorB_u_8x16_0 = vld1q_u8((
const uint8_t*)(descriptorB) + 0);
223 const uint8x16_t descriptorB_u_8x16_1 = vld1q_u8((
const uint8_t*)(descriptorB) + 16);
225 const uint8x16_t xor_u_8x16_0 = veorq_u8(descriptorA_u_8x16_0, descriptorB_u_8x16_0);
226 const uint8x16_t xor_u_8x16_1 = veorq_u8(descriptorA_u_8x16_1, descriptorB_u_8x16_1);
228 const uint8x16_t count_u_8x16 = vaddq_u8(vcntq_u8(xor_u_8x16_0), vcntq_u8(xor_u_8x16_1));
229 const uint16x8_t count_u_16x8 = vpaddlq_u8(count_u_8x16);
230 const uint32x4_t count_u_32x4 = vpaddlq_u16(count_u_16x8);
236 typedef std::bitset<256> Bitset;
237 static_assert(
sizeof(Bitset) == 256u / 8u,
"Invalid data type!");
239 Bitset bitsetA, bitsetB;
241 memcpy(&bitsetA, descriptorA,
sizeof(Bitset));
242 memcpy(&bitsetB, descriptorB,
sizeof(Bitset));
244 return (
unsigned int)(bitsetA ^ bitsetB).count();
249 template <
unsigned int tBits>
252 static_assert(tBits >= 128u && tBits % 128u == 0u,
"Invalid bit number!");
254 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 42
264 unsigned int result = 0u;
266 for (
unsigned int n = 0u; n < tBits / 128u; ++n)
268 const __m128i descriptorA_m128 =
SSE::load128i(((
const __m128i*)descriptorA) + n);
269 const __m128i descriptorB_m128 =
SSE::load128i(((
const __m128i*)descriptorB) + n);
271 const __m128i xor_m128 = _mm_xor_si128(descriptorA_m128, descriptorB_m128);
278 #elif defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 30
291 unsigned int result = 0u;
293 for (
unsigned int n = 0u; n < tBits / 128u; ++n)
295 const __m128i descriptorA_m128 =
SSE::load128i(((
const __m128i*)descriptorA) + n);
296 const __m128i descriptorB_m128 =
SSE::load128i(((
const __m128i*)descriptorB) + n);
298 const __m128i xor_m128 = _mm_xor_si128(descriptorA_m128, descriptorB_m128);
300 const __m128i countLowHigh_m128_64 =
popcount64(xor_m128);
301 const __m128i countHigh_m128_64 = _mm_unpackhi_epi64(countLowHigh_m128_64, countLowHigh_m128_64);
302 const __m128i count_m128 = _mm_add_epi32(countLowHigh_m128_64, countHigh_m128_64);
304 result += _mm_cvtsi128_si32(count_m128);
309 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
311 uint32x4_t result_u_32x4 = vdupq_n_u32(0u);
313 for (
unsigned int n = 0u; n < tBits / 128u; ++n)
315 const uint8x16_t descriptorA_u_8x16 = vld1q_u8((
const uint8_t*)(descriptorA) + 16u * n);
316 const uint8x16_t descriptorB_u_8x16 = vld1q_u8((
const uint8_t*)(descriptorB) + 16u * n);
318 const uint8x16_t xor_u_8x16 = veorq_u8(descriptorA_u_8x16, descriptorB_u_8x16);
320 const uint8x16_t count_u_8x16 = vcntq_u8(xor_u_8x16);
321 const uint16x8_t count_u_16x8 = vpaddlq_u8(count_u_8x16);
322 const uint32x4_t count_u_32x4 = vpaddlq_u16(count_u_16x8);
324 result_u_32x4 = vaddq_u32(result_u_32x4, count_u_32x4);
331 typedef std::bitset<tBits> Bitset;
332 static_assert(
sizeof(Bitset) == tBits / 8u,
"Invalid data type!");
334 Bitset bitsetA, bitsetB;
336 memcpy(&bitsetA, descriptorA,
sizeof(Bitset));
337 memcpy(&bitsetB, descriptorB,
sizeof(Bitset));
339 return (
unsigned int)(bitsetA ^ bitsetB).count();
344 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 30
359 const __m128i popcount_mask = _mm_set1_epi8(0x0F);
360 const __m128i popcount_table = _mm_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4);
361 const __m128i pcnt0 = _mm_shuffle_epi8(popcount_table, _mm_and_si128(value, popcount_mask));
362 const __m128i pcnt1 = _mm_shuffle_epi8(popcount_table, _mm_and_si128(_mm_srli_epi16(value, 4), popcount_mask));
363 return _mm_add_epi8(pcnt0, pcnt1);
377 return _mm_sad_epu8(cnt8, _mm_setzero_si128());
382 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 42
396 #if defined(_WIN64) || TARGET_OS_MAC == 1
398 return (
unsigned int)__popcnt64(_mm_cvtsi128_si64(value)) + (
unsigned int)__popcnt64(_mm_cvtsi128_si64(_mm_srli_si128(value, 8)));
402 return (
unsigned int)__popcnt(_mm_cvtsi128_si32(value)) + (
unsigned int)__popcnt(_mm_cvtsi128_si32(_mm_srli_si128(value, 4))) + (
unsigned int)__popcnt(_mm_cvtsi128_si32(_mm_srli_si128(value, 4))) + (
unsigned int)__popcnt(_mm_cvtsi128_si32(_mm_srli_si128(value, 4)));
This class implements the abstract base for arbitrary descriptors.
Definition: cv/detector/Descriptor.h:32
static OCEAN_FORCE_INLINE __m128i popcount8(const __m128i value)
Calculates a pop count of an m128i register in 8 bit groups.
Definition: cv/detector/Descriptor.h:346
static OCEAN_FORCE_INLINE unsigned int popcount128(const __m128i value)
Calculates a pop count of an m128i register in 64 bit groups.
Definition: cv/detector/Descriptor.h:384
Descriptor()
Creates a new descriptor object.
Definition: cv/detector/Descriptor.h:82
static OCEAN_FORCE_INLINE __m128i popcount64(const __m128i value)
Calculates a pop count of an m128i register in 64 bit groups.
Definition: cv/detector/Descriptor.h:366
static OCEAN_FORCE_INLINE unsigned int calculateHammingDistance(const void *descriptorA, const void *descriptorB)
Determines the hamming distance between two binary descriptors.
Definition: cv/detector/Descriptor.h:250
static OCEAN_FORCE_INLINE unsigned int sum32x4ByLanes(const uint32x4_t &value)
Sums the four 32 bit values and returns the result.
Definition: NEON.h:1085
static __m128i load128i(const void *const buffer)
Loads a 128i value from the memory.
Definition: SSE.h:3619
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15