Ocean
Loading...
Searching...
No Matches
base/Processor.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_BASE_PROCESSOR_H
9#define META_OCEAN_BASE_PROCESSOR_H
10
11#include "ocean/base/Base.h"
13
14namespace Ocean
15{
16
17/**
18 * Definition of individual processor instruction types.
19 * @ingroup base
20 */
21enum ProcessorInstructions : uint32_t
22{
23 /// Unknown processor instruction set.
24 PI_NONE = 0u,
25 /// SEE instructions.
26 PI_SSE = 1u << 0u,
27 /// SEE2 instructions.
28 PI_SSE_2 = 1u << 1u,
29 /// SSE3 instructions.
30 PI_SSE_3 = 1u << 2u,
31 /// SSSE3 instructions.
32 PI_SSSE_3 = 1u << 3u,
33 /// SSE_4.1 instructions.
34 PI_SSE_4_1 = 1u << 4u,
35 /// SSE 4.2 instructions.
36 PI_SSE_4_2 = 1u << 5u,
37 /// Any SSE instructions.
39
40 /// AVX instructions.
41 PI_AVX = 1u << 6u,
42 /// AVX2 instructions.
43 PI_AVX_2 = 1u << 7u,
44 /// AVX 512 instructions.
45 PI_AVX_512 = 1u << 8u,
46 /// Any AVX instructions.
48
49 /// NEON instructions.
50 PI_NEON = 1 << 9u,
51 /// Any NEON instructions.
53
54 /// AES instructions.
55 PI_AES = 1 << 10u,
56
57 /// All SSE instructions between (including) SSE and SSE2.
59 /// All SSE instructions between (including) SSE and SSE4.1.
61 /// All AVX instructions between (including) AVX and AVX2.
63 /// All AVX instructions between (including) AVX and AVX2 and SSE instructions between (including) SSE and SSE2, e.g., for processors supporting SSSE3 but not SSE3.
65 /// All NEON instructions (which is currently NEON only).
67 /// All AVX instructions between (including) AVX and AVX2 and SSE instructions between (including) SSE and SSE4.1.
69};
70
71/**
72 * This helper class allows to determine a compile-time known boolean statement whether a set of available instruction contains (included) a specified (minimal requirement) instruction.
73 * See this tutorial:
74 * @code
75 * template <ProcessorInstructions tHighestInstructions>
76 * void function()
77 * {
78 * static_assert((InstructionChecker<tHighestInstructions, SSE_2>::value), "This function needs at least SSE2 instructions");
79 *
80 * if (InstructionChecker<tHighestInstructions, SSE_4_1>::value)
81 * {
82 * // place code needing (at most) SSE4.1 instructions here
83 * }
84 * else if (InstructionChecker<tHighestInstructions, SSE_2>::value)
85 * {
86 * // place an alternative code using (at most) SS2 instructions here
87 * }
88 * }
89 * @endcode
90 * @ingroup base
91 * @tparam tHighestInstructions The set of available instructions, may be any combination of instructions
92 * @tparam tNecessaryInstruction The instruction that is required for a specific function (the minimal requirement), must be one specific instruction (not a set of several instructions)
93 */
94template <ProcessorInstructions tHighestInstructions, ProcessorInstructions tNecessaryInstruction>
96{
97 public:
98
99 /**
100 * True, if the requested instruction is part of the provided set of instructions.
101 * Here we disable the definition to ensure that the specialized template classes are used, as otherwise the parameter 'tNecessaryInstruction' covers a combination of several instructions.
102 */
103 // const static bool value = true;
104};
105
106/**
107 * This class implements basic functions relating the system processor.
108 * @ingroup base
109 */
110class OCEAN_BASE_EXPORT Processor : public Singleton<Processor>
111{
112 friend class Singleton<Processor>;
113
114 public:
115
116 /**
117 * Returns the number of available processor cores.
118 * If an explicit number of processors has been forced by the user, the user defined number will be returned.
119 * @return Number of processor cores
120 * @see realCores(), forceCores().
121 */
122 inline unsigned int cores() const;
123
124 /**
125 * Returns the supported instruction set of the processor.
126 * If an explicit instruction set has been forced by the user, the user defined instruction set will be returned.
127 * @return Instruction set of the processor
128 * @see realInstructions(), forceInstructions().
129 */
130 inline ProcessorInstructions instructions();
131
132 /**
133 * Forces a user defined number of processor cores.
134 * The forced number will be returned instead of the real cores using the cores() function.
135 * @param cores Number of cores to be forced, 0 to remove the previously forced core number
136 * @return True, if succeeded
137 * @see cores().
138 */
139 bool forceCores(const unsigned int cores);
140
141 /**
142 * Forces a user-defined processor instruction set.
143 * The forced instruction set will be returned instead of the real instruction set using the instructions() function.
144 * @param instructions The instruction set to be forced, -1 to remove the previously forced instruction set
145 * @return True, if succeeded
146 * @see instructions().
147 */
148 bool forceInstructions(const ProcessorInstructions instructions);
149
150 /**
151 * Returns the processor's brand.
152 * @return The processor's brand
153 */
154 static std::string brand();
155
156 /**
157 * Returns the number of available processor cores currently detectable.
158 * @return Number of processor cores
159 * @see cores().
160 */
161 static unsigned int realCores();
162
163 /**
164 * Returns the supported instruction set of the processor.
165 * @return The supported set of instructions
166 * @see instructions().
167 */
169
170 /**
171 * Translates a set of processor instructions to a readable string.
172 * @param instructions The instructions to be translated
173 * @return The resulting string containing the instruction names, 'No SIMD Instructions' if no instruction is specified
174 */
175 static std::string translateInstructions(const ProcessorInstructions instructions);
176
177 /**
178 * Returns the best group of instructions value for a set of given processor instructions.
179 * The function may return the following groups in the following order: PI_GROUP_AVX_2_SSE_4_1, PI_GROUP_SSE_4_1, PI_GROUP_AVX_2_SSE_2, PI_GROUP_AVX_2, PI_GROUP_SSE_2, PI_GROUP_NEON.
180 * @param instructions The set of instructions for which the best group will be returned
181 * @return The best group of instructions, PI_NONE if no group is matching
182 * @tparam tIndependentOfBinary True, to return the best group without checking the binaries capabilities; False, to return groups which are supported by the current binary only
183 */
184 template <bool tIndependentOfBinary>
185 static inline ProcessorInstructions bestInstructionGroup(const ProcessorInstructions instructions);
186
187 /**
188 * Returns whether the processor/system is using the little endian convention (like e.g., x86) or whether the big endian convention is used.
189 * @return True, if the little endian convention is used
190 */
191 static inline bool isLittleEndian();
192
193 /**
194 * Returns the current value of the ARM virtual count register cntvct_el0/cntpct_el0.
195 * The value returned is a monotonically increasing counter, typically used for high-resolution timing and performance measurements.<br>
196 * To convert the counter value to seconds, divide the difference between two readings by the system counter frequency.<br>
197 * The virtual count value is equal to the physical count value minus the virtual offset visible in cntvoff_el2.
198 * @param counter The resulting counter value
199 * @return True, if succeeded
200 * @see virtualCountFrequency().
201 */
202 static bool virtualCountRegister(uint64_t& counter);
203
204 /**
205 * Returns the frequency of the ARM virtual count register cntvct_el0/cntpct_el0 from the system counter frequency register cntfrq_el0.
206 * @param frequency The resulting frequency in Hz
207 * @return True, if succeeded
208 */
209 static bool virtualCountFrequency(uint64_t& frequency);
210
211 private:
212
213 /**
214 * Constructs a new processor object.
215 */
217
218#if defined(__APPLE__)
219
220 /**
221 * Returns the number of available processor cores currently detectable.
222 * @return Number of processor cores
223 * @see realCores().
224 */
225 static unsigned int realCoresApple();
226
227 #if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE==1
228
229 /**
230 * Returns the device name of the Apple iOS device.
231 * @return The device name
232 */
233 static std::string deviceModelAppleIOS();
234
235 #endif // TARGET_OS_IPHONE==1
236
237#endif // __APPLE__
238
239 /**
240 * Returns invalid processor instructions.
241 * @return Invalid instructions
242 */
243 static constexpr ProcessorInstructions invalidProcessorInstructions();
244
245 private:
246
247 /// Explicitly forced number of processor cores.
248 unsigned int forcedCores_ = 0u;
249
250 /// Explicitly forced CPU instructions.
251 ProcessorInstructions forcedProcessorInstructions_ = invalidProcessorInstructions();
252
253 /// The real instructions of the processor.
254 ProcessorInstructions processorInstructions_ = invalidProcessorInstructions();
255
256 /// The lock of the processor class.
257 mutable Lock lock_;
258};
259
260inline unsigned int Processor::cores() const
261{
262 const ScopedLock scopedLock(lock_);
263
264 if (forcedCores_ > 0u)
265 {
266 return forcedCores_;
267 }
268
269 return realCores();
270}
271
284
285/**
286 * Specialization for one specific instruction.
287 * @see ProcessorInstructionChecker.
288 * @ingroup base
289 */
290template <ProcessorInstructions tHighestInstructions>
291class ProcessorInstructionChecker<tHighestInstructions, PI_SSE>
292{
293 public:
294
295 /// True, if the requested instruction is part of the provided set of instructions.
296 constexpr static bool value = (tHighestInstructions & PI_SSE_ANY) >= PI_SSE;
297};
298
299/**
300 * Specialization for one specific instruction.
301 * @see ProcessorInstructionChecker.
302 * @ingroup base
303 */
304template <ProcessorInstructions tHighestInstructions>
305class ProcessorInstructionChecker<tHighestInstructions, PI_SSE_2>
306{
307 public:
308
309 /// True, if the requested instruction is part of the provided set of instructions.
310 constexpr static bool value = (tHighestInstructions & PI_SSE_ANY) >= PI_SSE_2;
311};
312
313/**
314 * Specialization for one specific instruction.
315 * @see ProcessorInstructionChecker.
316 * @ingroup base
317 */
318template <ProcessorInstructions tHighestInstructions>
319class ProcessorInstructionChecker<tHighestInstructions, PI_SSE_3>
320{
321 public:
322
323 /// True, if the requested instruction is part of the provided set of instructions.
324 constexpr static bool value = (tHighestInstructions & PI_SSE_ANY) >= PI_SSE_3;
325};
326
327/**
328 * Specialization for one specific instruction.
329 * @see ProcessorInstructionChecker.
330 * @ingroup base
331 */
332template <ProcessorInstructions tHighestInstructions>
333class ProcessorInstructionChecker<tHighestInstructions, PI_SSSE_3>
334{
335 public:
336
337 /// True, if the requested instruction is part of the provided set of instructions.
338 constexpr static bool value = (tHighestInstructions & PI_SSE_ANY) >= PI_SSSE_3;
339};
340
341/**
342 * Specialization for one specific instruction.
343 * @see ProcessorInstructionChecker.
344 * @ingroup base
345 */
346template <ProcessorInstructions tHighestInstructions>
347class ProcessorInstructionChecker<tHighestInstructions, PI_SSE_4_1>
348{
349 public:
350
351 /// True, if the requested instruction is part of the provided set of instructions.
352 constexpr static bool value = (tHighestInstructions & PI_SSE_ANY) >= PI_SSE_4_1;
353};
354
355/**
356 * Specialization for one specific instruction.
357 * @see ProcessorInstructionChecker.
358 * @ingroup base
359 */
360template <ProcessorInstructions tHighestInstructions>
361class ProcessorInstructionChecker<tHighestInstructions, PI_SSE_4_2>
362{
363 public:
364
365 /// True, if the requested instruction is part of the provided set of instructions.
366 constexpr static bool value = (tHighestInstructions & PI_SSE_ANY) >= PI_SSE_4_2;
367};
368
369/**
370 * Specialization for one specific instruction.
371 * @see ProcessorInstructionChecker.
372 * @ingroup base
373 */
374template <ProcessorInstructions tHighestInstructions>
375class ProcessorInstructionChecker<tHighestInstructions, PI_AVX>
376{
377 public:
378
379 /// True, if the requested instruction is part of the provided set of instructions.
380 constexpr static bool value = (tHighestInstructions & PI_AVX_ANY) >= PI_AVX;
381};
382
383/**
384 * Specialization for one specific instruction.
385 * @see ProcessorInstructionChecker.
386 * @ingroup base
387 */
388template <ProcessorInstructions tHighestInstructions>
389class ProcessorInstructionChecker<tHighestInstructions, PI_AVX_2>
390{
391 public:
392
393 /// True, if the requested instruction is part of the provided set of instructions.
394 constexpr static bool value = (tHighestInstructions & PI_AVX_ANY) >= PI_AVX_2;
395};
396
397/**
398 * Specialization for one specific instruction.
399 * @see ProcessorInstructionChecker.
400 * @ingroup base
401 */
402template <ProcessorInstructions tHighestInstructions>
403class ProcessorInstructionChecker<tHighestInstructions, PI_AVX_512>
404{
405 public:
406
407 /// True, if the requested instruction is part of the provided set of instructions.
408 constexpr static bool value = (tHighestInstructions & PI_AVX_ANY) >= PI_AVX_512;
409};
410
411/**
412 * Specialization for one specific instruction.
413 * @see ProcessorInstructionChecker.
414 * @ingroup base
415 */
416template <ProcessorInstructions tHighestInstructions>
417class ProcessorInstructionChecker<tHighestInstructions, PI_NEON>
418{
419 public:
420
421 /// True, if the requested instruction is part of the provided set of instructions.
422 constexpr static bool value = (tHighestInstructions & PI_NEON_ANY) >= PI_NEON;
423};
424
425template <bool tIndependentOfBinary>
427{
429 {
431 }
432
434 {
435 return PI_GROUP_SSE_4_1;
436 }
437
439 {
441 }
442
444 {
445 return PI_GROUP_AVX_2;
446 }
447
449 {
450 return PI_GROUP_SSE_2;
451 }
452
454 {
455 return PI_GROUP_NEON;
456 }
457
458 return PI_NONE;
459}
460
461template <>
462inline ProcessorInstructions Processor::bestInstructionGroup<false>(const ProcessorInstructions instructions)
463{
464#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41 && defined(OCEAN_HARDWARE_AVX_VERSION) && OCEAN_HARDWARE_AVX_VERSION >= 20
466 {
468 }
469#endif
470
471#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
473 {
474 return PI_GROUP_SSE_4_1;
475 }
476#endif
477
478#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 20 && defined(OCEAN_HARDWARE_AVX_VERSION) && OCEAN_HARDWARE_AVX_VERSION >= 20
480 {
482 }
483#endif
484
485#if defined(OCEAN_HARDWARE_AVX_VERSION) && OCEAN_HARDWARE_AVX_VERSION >= 20
487 {
488 return PI_GROUP_AVX_2;
489 }
490#endif
491
492#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 20
494 {
495 return PI_GROUP_SSE_2;
496 }
497#endif
498
499#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
501 {
502 return PI_GROUP_NEON;
503 }
504#endif
505
506 OCEAN_SUPPRESS_UNUSED_WARNING(instructions);
507
508 return PI_NONE;
509}
510
512{
513 const int32_t littleEndianValue = 1;
514
515 const bool result = (*(int8_t*)(&littleEndianValue)) == int8_t(1);
516
517#ifdef OCEAN_LITTLE_ENDIAN
518 ocean_assert(result);
519#else
520 ocean_assert(!result);
521#endif
522
523 return result;
524}
525
530
531}
532
533#endif // META_OCEAN_BASE_PROCESSOR_H
This class implements a recursive lock object.
Definition Lock.h:31
This class implements basic functions relating the system processor.
Definition base/Processor.h:111
Lock lock_
The lock of the processor class.
Definition base/Processor.h:257
bool forceCores(const unsigned int cores)
Forces a user defined number of processor cores.
ProcessorInstructions instructions()
Returns the supported instruction set of the processor.
Definition base/Processor.h:272
ProcessorInstructions forcedProcessorInstructions_
Explicitly forced CPU instructions.
Definition base/Processor.h:251
static std::string deviceModelAppleIOS()
Returns the device name of the Apple iOS device.
static unsigned int realCores()
Returns the number of available processor cores currently detectable.
static ProcessorInstructions bestInstructionGroup(const ProcessorInstructions instructions)
Returns the best group of instructions value for a set of given processor instructions.
Definition base/Processor.h:426
ProcessorInstructions processorInstructions_
The real instructions of the processor.
Definition base/Processor.h:254
static ProcessorInstructions realInstructions()
Returns the supported instruction set of the processor.
bool forceInstructions(const ProcessorInstructions instructions)
Forces a user-defined processor instruction set.
static bool virtualCountRegister(uint64_t &counter)
Returns the current value of the ARM virtual count register cntvct_el0/cntpct_el0.
static std::string brand()
Returns the processor's brand.
static constexpr ProcessorInstructions invalidProcessorInstructions()
Returns invalid processor instructions.
Definition base/Processor.h:526
static std::string translateInstructions(const ProcessorInstructions instructions)
Translates a set of processor instructions to a readable string.
static bool virtualCountFrequency(uint64_t &frequency)
Returns the frequency of the ARM virtual count register cntvct_el0/cntpct_el0 from the system counter...
static bool isLittleEndian()
Returns whether the processor/system is using the little endian convention (like e....
Definition base/Processor.h:511
unsigned int forcedCores_
Explicitly forced number of processor cores.
Definition base/Processor.h:248
static unsigned int realCoresApple()
Returns the number of available processor cores currently detectable.
unsigned int cores() const
Returns the number of available processor cores.
Definition base/Processor.h:260
Processor()
Constructs a new processor object.
This helper class allows to determine a compile-time known boolean statement whether a set of availab...
Definition base/Processor.h:96
This class implements a scoped lock object for recursive lock objects.
Definition Lock.h:147
This template class is the base class for all singleton objects.
Definition Singleton.h:71
ProcessorInstructions
Definition of individual processor instruction types.
Definition base/Processor.h:22
@ PI_AVX
AVX instructions.
Definition base/Processor.h:41
@ PI_SSE_3
SSE3 instructions.
Definition base/Processor.h:30
@ PI_NONE
Unknown processor instruction set.
Definition base/Processor.h:24
@ PI_GROUP_AVX_2_SSE_2
All AVX instructions between (including) AVX and AVX2 and SSE instructions between (including) SSE an...
Definition base/Processor.h:64
@ PI_GROUP_AVX_2
All AVX instructions between (including) AVX and AVX2.
Definition base/Processor.h:62
@ PI_SSE_4_1
SSE_4.1 instructions.
Definition base/Processor.h:34
@ PI_GROUP_SSE_4_1
All SSE instructions between (including) SSE and SSE4.1.
Definition base/Processor.h:60
@ PI_NEON_ANY
Any NEON instructions.
Definition base/Processor.h:52
@ PI_SSE_4_2
SSE 4.2 instructions.
Definition base/Processor.h:36
@ PI_SSE_2
SEE2 instructions.
Definition base/Processor.h:28
@ PI_AVX_ANY
Any AVX instructions.
Definition base/Processor.h:47
@ PI_NEON
NEON instructions.
Definition base/Processor.h:50
@ PI_SSE_ANY
Any SSE instructions.
Definition base/Processor.h:38
@ PI_AVX_512
AVX 512 instructions.
Definition base/Processor.h:45
@ PI_AVX_2
AVX2 instructions.
Definition base/Processor.h:43
@ PI_SSE
SEE instructions.
Definition base/Processor.h:26
@ PI_GROUP_AVX_2_SSE_4_1
All AVX instructions between (including) AVX and AVX2 and SSE instructions between (including) SSE an...
Definition base/Processor.h:68
@ PI_GROUP_SSE_2
All SSE instructions between (including) SSE and SSE2.
Definition base/Processor.h:58
@ PI_GROUP_NEON
All NEON instructions (which is currently NEON only).
Definition base/Processor.h:66
@ PI_AES
AES instructions.
Definition base/Processor.h:55
@ PI_SSSE_3
SSSE3 instructions.
Definition base/Processor.h:32
The namespace covering the entire Ocean framework.
Definition Accessor.h:15