Ocean
Loading...
Searching...
No Matches
cv/detector/Descriptor.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_CV_DETECTOR_DESCRIPTOR_H
9#define META_OCEAN_CV_DETECTOR_DESCRIPTOR_H
10
12
13#include "ocean/cv/NEON.h"
14#include "ocean/cv/SSE.h"
15
16#include <bitset>
17
18namespace Ocean
19{
20
21namespace CV
22{
23
24namespace Detector
25{
26
27/**
28 * This class implements the abstract base for arbitrary descriptors.
29 * @ingroup cvdetector
30 */
31class OCEAN_CV_DETECTOR_EXPORT Descriptor
32{
33 public:
34
35 /**
36 * Creates a new descriptor object.
37 */
38 inline Descriptor();
39
40 /**
41 * Determines the hamming distance between two binary descriptors.
42 * @param descriptorA The first descriptor, must be valid
43 * @param descriptorB The second descriptor, must be valid
44 * @return The hamming distance between both descriptors (the number of not identical corresponding bits), with range [0, tBits]
45 * @tparam tBits The number of bits both descriptors have, with range [128, infinity), must be a multiple of 128
46 */
47 template <unsigned int tBits>
48 static OCEAN_FORCE_INLINE unsigned int calculateHammingDistance(const void* descriptorA, const void* descriptorB);
49
50 protected:
51
52#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 30
53
54 /**
55 * Calculates a pop count of an m128i register in 8 bit groups.
56 * @param value Bit string to calculate pop count from
57 * @return Pop count
58 */
59 static OCEAN_FORCE_INLINE __m128i popcount8(const __m128i value);
60
61 /**
62 * Calculates a pop count of an m128i register in 64 bit groups.
63 * @param value Bit string to calculate pop count from
64 * @return Pop count
65 */
66 static OCEAN_FORCE_INLINE __m128i popcount64(const __m128i value);
67
68#endif
69
70#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 42
71
72 /**
73 * Calculates a pop count of an m128i register in 64 bit groups.
74 * @param value Bit string to calculate pop count from
75 * @return Pop count
76 */
77 static OCEAN_FORCE_INLINE unsigned int popcount128(const __m128i value);
78
79#endif
80};
81
83{
84 // nothing to here
85}
86
87template <>
88OCEAN_FORCE_INLINE unsigned int Descriptor::calculateHammingDistance<128u>(const void* descriptorA, const void* descriptorB)
89{
90#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 42
91
92 // the following code uses the following SSE instructions, and needs SSE4.2 or higher
93
94 // SSE2:
95 // _mm_lddqu_si128
96 // _mm_xor_si128
97
98 // see also popcount128()
99
100 const __m128i descriptorA_m128 = SSE::load128i(descriptorA);
101 const __m128i descriptorB_m128 = SSE::load128i(descriptorB);
102
103 const __m128i xor_m128 = _mm_xor_si128(descriptorA_m128, descriptorB_m128);
104
105 return popcount128(xor_m128);
106
107#elif defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 30
108
109 // the following code uses the following SSE instructions, and needs SSE3 or higher
110
111 // SSE2:
112 // _mm_load1_pd
113 // _mm_loadu_pd
114 // _mm_mul_pd
115 // _mm_add_pd
116 // _mm_storeu_pd
117
118 // see also popcount64()
119
120 const __m128i descriptorA_m128 = SSE::load128i(descriptorA);
121 const __m128i descriptorB_m128 = SSE::load128i(descriptorB);
122
123 const __m128i xor_m128 = _mm_xor_si128(descriptorA_m128, descriptorB_m128);
124
125 const __m128i countLowHigh_m128_64 = popcount64(xor_m128);
126 const __m128i countHigh_m128_64 = _mm_unpackhi_epi64(countLowHigh_m128_64, countLowHigh_m128_64);
127 const __m128i count_m128 = _mm_add_epi32(countLowHigh_m128_64, countHigh_m128_64);
128
129 return _mm_cvtsi128_si32(count_m128);
130
131#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
132
133 const uint8x16_t descriptorA_u_8x16 = vld1q_u8((const uint8_t*)(descriptorA));
134 const uint8x16_t descriptorB_u_8x16 = vld1q_u8((const uint8_t*)(descriptorB));
135
136 const uint8x16_t xor_u_8x16 = veorq_u8(descriptorA_u_8x16, descriptorB_u_8x16);
137
138 const uint8x16_t count_u_8x16 = vcntq_u8(xor_u_8x16);
139 const uint16x8_t count_u_16x8 = vpaddlq_u8(count_u_8x16);
140 const uint32x4_t count_u_32x4 = vpaddlq_u16(count_u_16x8);
141
142 return NEON::sum32x4ByLanes(count_u_32x4);
143
144#else
145
146 typedef std::bitset<128> Bitset;
147 static_assert(sizeof(Bitset) == 128u / 8u, "Invalid data type!");
148
149 Bitset bitsetA, bitsetB;
150
151 memcpy(&bitsetA, descriptorA, sizeof(Bitset));
152 memcpy(&bitsetB, descriptorB, sizeof(Bitset));
153
154 return (unsigned int)(bitsetA ^ bitsetB).count();
155
156#endif
157}
158
159template <>
160OCEAN_FORCE_INLINE unsigned int Descriptor::calculateHammingDistance<256u>(const void* descriptorA, const void* descriptorB)
161{
162#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 42
163
164 // the following code uses the following SSE instructions, and needs SSE4.2 or higher
165
166 // SSE2:
167 // _mm_lddqu_si128
168 // _mm_xor_si128
169
170 // see also popcount128()
171
172 const __m128i descriptorA_m128_0 = SSE::load128i(((const __m128i*)descriptorA) + 0);
173 const __m128i descriptorA_m128_1 = SSE::load128i(((const __m128i*)descriptorA) + 1);
174
175 const __m128i descriptorB_m128_0 = SSE::load128i(((const __m128i*)descriptorB) + 0);
176 const __m128i descriptorB_m128_1 = SSE::load128i(((const __m128i*)descriptorB) + 1);
177
178 const __m128i xor_m128_0 = _mm_xor_si128(descriptorA_m128_0, descriptorB_m128_0);
179 const __m128i xor_m128_1 = _mm_xor_si128(descriptorA_m128_1, descriptorB_m128_1);
180
181 return popcount128(xor_m128_0) + popcount128(xor_m128_1);
182
183#elif defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 30
184
185 // the following code uses the following SSE instructions, and needs SSE3 or higher
186
187 // SSE2:
188 // _mm_load1_pd
189 // _mm_loadu_pd
190 // _mm_mul_pd
191 // _mm_add_pd
192 // _mm_storeu_pd
193
194 // see also popcount64()
195
196 const __m128i descriptorA_m128_0 = SSE::load128i(((const __m128i*)descriptorA) + 0);
197 const __m128i descriptorA_m128_1 = SSE::load128i(((const __m128i*)descriptorA) + 1);
198
199 const __m128i descriptorB_m128_0 = SSE::load128i(((const __m128i*)descriptorB) + 0);
200 const __m128i descriptorB_m128_1 = SSE::load128i(((const __m128i*)descriptorB) + 1);
201
202 const __m128i xor_m128_0 = _mm_xor_si128(descriptorA_m128_0, descriptorB_m128_0);
203 const __m128i xor_m128_1 = _mm_xor_si128(descriptorA_m128_1, descriptorB_m128_1);
204
205 const __m128i countLowHigh_m128_64_0 = popcount64(xor_m128_0);
206 const __m128i countHigh_m128_64_0 = _mm_unpackhi_epi64(countLowHigh_m128_64_0, countLowHigh_m128_64_0);
207 const __m128i count_m128_0 = _mm_add_epi32(countLowHigh_m128_64_0, countHigh_m128_64_0);
208 const unsigned int hammingDistance_0 = _mm_cvtsi128_si32(count_m128_0);
209
210 const __m128i countLowHigh_m128_64_1 = popcount64(xor_m128_1);
211 const __m128i countHigh_m128_64_1 = _mm_unpackhi_epi64(countLowHigh_m128_64_1, countLowHigh_m128_64_1);
212 const __m128i count_m128_1 = _mm_add_epi32(countLowHigh_m128_64_1, countHigh_m128_64_1);
213 const unsigned int hammingDistance_1 = _mm_cvtsi128_si32(count_m128_1);
214
215 return hammingDistance_0 + hammingDistance_1;
216
217#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
218
219 const uint8x16_t descriptorA_u_8x16_0 = vld1q_u8((const uint8_t*)(descriptorA) + 0);
220 const uint8x16_t descriptorA_u_8x16_1 = vld1q_u8((const uint8_t*)(descriptorA) + 16);
221
222 const uint8x16_t descriptorB_u_8x16_0 = vld1q_u8((const uint8_t*)(descriptorB) + 0);
223 const uint8x16_t descriptorB_u_8x16_1 = vld1q_u8((const uint8_t*)(descriptorB) + 16);
224
225 const uint8x16_t xor_u_8x16_0 = veorq_u8(descriptorA_u_8x16_0, descriptorB_u_8x16_0);
226 const uint8x16_t xor_u_8x16_1 = veorq_u8(descriptorA_u_8x16_1, descriptorB_u_8x16_1);
227
228 const uint8x16_t count_u_8x16 = vaddq_u8(vcntq_u8(xor_u_8x16_0), vcntq_u8(xor_u_8x16_1));
229 const uint16x8_t count_u_16x8 = vpaddlq_u8(count_u_8x16);
230 const uint32x4_t count_u_32x4 = vpaddlq_u16(count_u_16x8);
231
232 return NEON::sum32x4ByLanes(count_u_32x4);
233
234#else
235
236 typedef std::bitset<256> Bitset;
237 static_assert(sizeof(Bitset) == 256u / 8u, "Invalid data type!");
238
239 Bitset bitsetA, bitsetB;
240
241 memcpy(&bitsetA, descriptorA, sizeof(Bitset));
242 memcpy(&bitsetB, descriptorB, sizeof(Bitset));
243
244 return (unsigned int)(bitsetA ^ bitsetB).count();
245
246#endif
247}
248
249template <unsigned int tBits>
250OCEAN_FORCE_INLINE unsigned int Descriptor::calculateHammingDistance(const void* descriptorA, const void* descriptorB)
251{
252 static_assert(tBits >= 128u && tBits % 128u == 0u, "Invalid bit number!");
253
254#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 42
255
256 // the following code uses the following SSE instructions, and needs SSE4.2 or higher
257
258 // SSE2:
259 // _mm_lddqu_si128
260 // _mm_xor_si128
261
262 // see also popcount128()
263
264 unsigned int result = 0u;
265
266 for (unsigned int n = 0u; n < tBits / 128u; ++n)
267 {
268 const __m128i descriptorA_m128 = SSE::load128i(((const __m128i*)descriptorA) + n);
269 const __m128i descriptorB_m128 = SSE::load128i(((const __m128i*)descriptorB) + n);
270
271 const __m128i xor_m128 = _mm_xor_si128(descriptorA_m128, descriptorB_m128);
272
273 result += popcount128(xor_m128);
274 }
275
276 return result;
277
278#elif defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 30
279
280 // the following code uses the following SSE instructions, and needs SSE3 or higher
281
282 // SSE2:
283 // _mm_load1_pd
284 // _mm_loadu_pd
285 // _mm_mul_pd
286 // _mm_add_pd
287 // _mm_storeu_pd
288
289 // see also popcount64()
290
291 unsigned int result = 0u;
292
293 for (unsigned int n = 0u; n < tBits / 128u; ++n)
294 {
295 const __m128i descriptorA_m128 = SSE::load128i(((const __m128i*)descriptorA) + n);
296 const __m128i descriptorB_m128 = SSE::load128i(((const __m128i*)descriptorB) + n);
297
298 const __m128i xor_m128 = _mm_xor_si128(descriptorA_m128, descriptorB_m128);
299
300 const __m128i countLowHigh_m128_64 = popcount64(xor_m128);
301 const __m128i countHigh_m128_64 = _mm_unpackhi_epi64(countLowHigh_m128_64, countLowHigh_m128_64);
302 const __m128i count_m128 = _mm_add_epi32(countLowHigh_m128_64, countHigh_m128_64);
303
304 result += _mm_cvtsi128_si32(count_m128);
305 }
306
307 return result;
308
309#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
310
311 uint32x4_t result_u_32x4 = vdupq_n_u32(0u);
312
313 for (unsigned int n = 0u; n < tBits / 128u; ++n)
314 {
315 const uint8x16_t descriptorA_u_8x16 = vld1q_u8((const uint8_t*)(descriptorA) + 16u * n);
316 const uint8x16_t descriptorB_u_8x16 = vld1q_u8((const uint8_t*)(descriptorB) + 16u * n);
317
318 const uint8x16_t xor_u_8x16 = veorq_u8(descriptorA_u_8x16, descriptorB_u_8x16);
319
320 const uint8x16_t count_u_8x16 = vcntq_u8(xor_u_8x16);
321 const uint16x8_t count_u_16x8 = vpaddlq_u8(count_u_8x16);
322 const uint32x4_t count_u_32x4 = vpaddlq_u16(count_u_16x8);
323
324 result_u_32x4 = vaddq_u32(result_u_32x4, count_u_32x4);
325 }
326
327 return NEON::sum32x4ByLanes(result_u_32x4);
328
329#else
330
331 typedef std::bitset<tBits> Bitset;
332 static_assert(sizeof(Bitset) == tBits / 8u, "Invalid data type!");
333
334 Bitset bitsetA, bitsetB;
335
336 memcpy(&bitsetA, descriptorA, sizeof(Bitset));
337 memcpy(&bitsetB, descriptorB, sizeof(Bitset));
338
339 return (unsigned int)(bitsetA ^ bitsetB).count();
340
341#endif
342}
343
344#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 30
345
346OCEAN_FORCE_INLINE __m128i Descriptor::popcount8(const __m128i value)
347{
348 // the following code uses the following SSE instructions, and needs SSE3 or higher
349
350 // SSE2:
351 // _mm_set1_epi8
352 // _mm_setr_epi8
353 // _mm_and_si128
354 // _mm_add_epi8
355
356 // SSE3:
357 // _mm_shuffle_epi8
358
359 const __m128i popcount_mask = _mm_set1_epi8(0x0F);
360 const __m128i popcount_table = _mm_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4);
361 const __m128i pcnt0 = _mm_shuffle_epi8(popcount_table, _mm_and_si128(value, popcount_mask));
362 const __m128i pcnt1 = _mm_shuffle_epi8(popcount_table, _mm_and_si128(_mm_srli_epi16(value, 4), popcount_mask));
363 return _mm_add_epi8(pcnt0, pcnt1);
364}
365
366OCEAN_FORCE_INLINE __m128i Descriptor::popcount64(const __m128i value)
367{
368 // the following code uses the following SSE instructions, and needs SSE3 or higher
369
370 // SSE2:
371 // _mm_sad_epu8
372 // _mm_setzero_si128
373
374 // see also popcount8()
375
376 const __m128i cnt8 = popcount8(value);
377 return _mm_sad_epu8(cnt8, _mm_setzero_si128());
378}
379
380#endif
381
382#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 42
383
384OCEAN_FORCE_INLINE unsigned int Descriptor::popcount128(const __m128i value)
385{
386 // the following code uses the following SSE instructions, and needs SSE4.2 or higher
387
388 // SSE2:
389 // _mm_cvtsi128_si64
390 // _mm_srli_si128
391
392 // SSE4.2:
393 // __popcnt64
394 // __popcnt
395
396#if defined(_WIN64) || TARGET_OS_MAC == 1
397
398 return (unsigned int)__popcnt64(_mm_cvtsi128_si64(value)) + (unsigned int)__popcnt64(_mm_cvtsi128_si64(_mm_srli_si128(value, 8)));
399
400#else
401
402 return (unsigned int)__popcnt(_mm_cvtsi128_si32(value)) + (unsigned int)__popcnt(_mm_cvtsi128_si32(_mm_srli_si128(value, 4))) + (unsigned int)__popcnt(_mm_cvtsi128_si32(_mm_srli_si128(value, 4))) + (unsigned int)__popcnt(_mm_cvtsi128_si32(_mm_srli_si128(value, 4)));
403
404#endif
405}
406
407#endif
408
409}
410
411}
412
413}
414
415#endif // META_OCEAN_CV_DETECTOR_DESCRIPTOR_H
This class implements the abstract base for arbitrary descriptors.
Definition cv/detector/Descriptor.h:32
static OCEAN_FORCE_INLINE __m128i popcount8(const __m128i value)
Calculates a pop count of an m128i register in 8 bit groups.
Definition cv/detector/Descriptor.h:346
static OCEAN_FORCE_INLINE unsigned int popcount128(const __m128i value)
Calculates a pop count of an m128i register in 64 bit groups.
Definition cv/detector/Descriptor.h:384
Descriptor()
Creates a new descriptor object.
Definition cv/detector/Descriptor.h:82
static OCEAN_FORCE_INLINE __m128i popcount64(const __m128i value)
Calculates a pop count of an m128i register in 64 bit groups.
Definition cv/detector/Descriptor.h:366
static OCEAN_FORCE_INLINE unsigned int calculateHammingDistance(const void *descriptorA, const void *descriptorB)
Determines the hamming distance between two binary descriptors.
Definition cv/detector/Descriptor.h:250
static OCEAN_FORCE_INLINE unsigned int sum32x4ByLanes(const uint32x4_t &value)
Sums the four 32 bit values and returns the result.
Definition NEON.h:1085
static __m128i load128i(const void *const buffer)
Loads a 128i value from the memory.
Definition SSE.h:3619
The namespace covering the entire Ocean framework.
Definition Accessor.h:15