Ocean
base/Processor.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #ifndef META_OCEAN_BASE_PROCESSOR_H
9 #define META_OCEAN_BASE_PROCESSOR_H
10 
11 #include "ocean/base/Base.h"
12 #include "ocean/base/Singleton.h"
13 
14 namespace Ocean
15 {
16 
17 /**
18  * Definition of individual processor instruction types.
19  * @ingroup base
20  */
21 enum ProcessorInstructions : uint32_t
22 {
23  /// Unknown processor instruction set.
24  PI_NONE = 0u,
25  /// SEE instructions.
26  PI_SSE = 1u << 0u,
27  /// SEE2 instructions.
28  PI_SSE_2 = 1u << 1u,
29  /// SSE3 instructions.
30  PI_SSE_3 = 1u << 2u,
31  /// SSSE3 instructions.
32  PI_SSSE_3 = 1u << 3u,
33  /// SSE_4.1 instructions.
34  PI_SSE_4_1 = 1u << 4u,
35  /// SSE 4.2 instructions.
36  PI_SSE_4_2 = 1u << 5u,
37  /// Any SSE instructions.
39 
40  /// AVX instructions.
41  PI_AVX = 1u << 6u,
42  /// AVX2 instructions.
43  PI_AVX_2 = 1u << 7u,
44  /// AVX 512 instructions.
45  PI_AVX_512 = 1u << 8u,
46  /// Any AVX instructions.
48 
49  /// NEON instructions.
50  PI_NEON = 1 << 9u,
51  /// Any NEON instructions.
53 
54  /// AES instructions.
55  PI_AES = 1 << 10u,
56 
57  /// All SSE instructions between (including) SSE and SSE2.
59  /// All SSE instructions between (including) SSE and SSE4.1.
61  /// All AVX instructions between (including) AVX and AVX2.
63  /// All AVX instructions between (including) AVX and AVX2 and SSE instructions between (including) SSE and SSE2, e.g., for processors supporting SSSE3 but not SSE3.
65  /// All NEON instructions (which is currently NEON only).
67  /// All AVX instructions between (including) AVX and AVX2 and SSE instructions between (including) SSE and SSE4.1.
69 };
70 
71 /**
72  * This helper class allows to determine a compile-time known boolean statement whether a set of available instruction contains (included) a specified (minimal requirement) instruction.
73  * See this tutorial:
74  * @code
75  * template <ProcessorInstructions tHighestInstructions>
76  * void function()
77  * {
78  * static_assert((InstructionChecker<tHighestInstructions, SSE_2>::value), "This function needs at least SSE2 instructions");
79  *
80  * if (InstructionChecker<tHighestInstructions, SSE_4_1>::value)
81  * {
82  * // place code needing (at most) SSE4.1 instructions here
83  * }
84  * else if (InstructionChecker<tHighestInstructions, SSE_2>::value)
85  * {
86  * // place an alternative code using (at most) SS2 instructions here
87  * }
88  * }
89  * @endcode
90  * @ingroup base
91  * @tparam tHighestInstructions The set of available instructions, may be any combination of instructions
92  * @tparam tNecessaryInstruction The instruction that is required for a specific function (the minimal requirement), must be one specific instruction (not a set of several instructions)
93  */
94 template <ProcessorInstructions tHighestInstructions, ProcessorInstructions tNecessaryInstruction>
96 {
97  public:
98 
99  /**
100  * True, if the requested instruction is part of the provided set of instructions.
101  * Here we disable the definition to ensure that the specialized template classes are used, as otherwise the parameter 'tNecessaryInstruction' covers a combination of several instructions.
102  */
103  // const static bool value = true;
104 };
105 
106 /**
107  * This class implements basic functions relating the system processor.
108  * @ingroup base
109  */
110 class OCEAN_BASE_EXPORT Processor : public Singleton<Processor>
111 {
112  friend class Singleton<Processor>;
113 
114  public:
115 
116  /**
117  * Returns the number of available processor cores.
118  * If an explicit number of processors has been forced by the user, the user defined number will be returned.
119  * @return Number of processor cores
120  * @see realCores(), forceCores().
121  */
122  inline unsigned int cores() const;
123 
124  /**
125  * Returns the supported instruction set of the processor.
126  * If an explicit instruction set has been forced by the user, the user defined instruction set will be returned.
127  * @return Instruction set of the processor
128  * @see realInstructions(), forceInstructions().
129  */
130  inline ProcessorInstructions instructions();
131 
132  /**
133  * Forces a user defined number of processor cores.
134  * The forced number will be returned instead of the real cores using the cores() function.
135  * @param cores Number of cores to be forced, 0 to remove the previously forced core number
136  * @return True, if succeeded
137  * @see cores().
138  */
139  bool forceCores(const unsigned int cores);
140 
141  /**
142  * Forces a user-defined processor instruction set.
143  * The forced instruction set will be returned instead of the real instruction set using the instructions() function.
144  * @param instructions The instruction set to be forced, -1 to remove the previously forced instruction set
145  * @return True, if succeeded
146  * @see instructions().
147  */
148  bool forceInstructions(const ProcessorInstructions instructions);
149 
150  /**
151  * Returns the processor's brand.
152  * @return The processor's brand
153  */
154  static std::string brand();
155 
156  /**
157  * Returns the number of available processor cores currently detectable.
158  * @return Number of processor cores
159  * @see cores().
160  */
161  static unsigned int realCores();
162 
163  /**
164  * Returns the supported instruction set of the processor.
165  * @return The supported set of instructions
166  * @see instructions().
167  */
169 
170  /**
171  * Translates a set of processor instructions to a readable string.
172  * @param instructions The instructions to be translated
173  * @return The resulting string containing the instruction names, 'No SIMD Instructions' if no instruction is specified
174  */
175  static std::string translateInstructions(const ProcessorInstructions instructions);
176 
177  /**
178  * Returns the best group of instructions value for a set of given processor instructions.
179  * The function may return the following groups in the following order: PI_GROUP_AVX_2_SSE_4_1, PI_GROUP_SSE_4_1, PI_GROUP_AVX_2_SSE_2, PI_GROUP_AVX_2, PI_GROUP_SSE_2, PI_GROUP_NEON.
180  * @param instructions The set of instructions for which the best group will be returned
181  * @return The best group of instructions, PI_NONE if no group is matching
182  * @tparam tIndependentOfBinary True, to return the best group without checking the binaries capabilities; False, to return groups which are supported by the current binary only
183  */
184  template <bool tIndependentOfBinary>
185  static inline ProcessorInstructions bestInstructionGroup(const ProcessorInstructions instructions);
186 
187  /**
188  * Returns whether the processor/system is using the little endian convention (like e.g., x86) or whether the big endian convention is used.
189  * @return True, if the little endian convention is used
190  */
191  static inline bool isLittleEndian();
192 
193  private:
194 
195  /**
196  * Constructs a new processor object.
197  */
199 
200 #if defined(__APPLE__)
201 
202  /**
203  * Returns the number of available processor cores currently detectable.
204  * @return Number of processor cores
205  * @see realCores().
206  */
207  static unsigned int realCoresApple();
208 
209  #if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE==1
210 
211  /**
212  * Returns the device name of the Apple iOS device.
213  * @return The device name
214  */
215  static std::string deviceModelAppleIOS();
216 
217  #endif // TARGET_OS_IPHONE==1
218 
219 #endif // __APPLE__
220 
221  /**
222  * Returns invalid processor instructions.
223  * @return Invalid instructions
224  */
225  static constexpr ProcessorInstructions invalidProcessorInstructions();
226 
227  private:
228 
229  /// Explicitly forced number of processor cores.
230  unsigned int forcedCores_ = 0u;
231 
232  /// Explicitly forced CPU instructions.
233  ProcessorInstructions forcedProcessorInstructions_ = invalidProcessorInstructions();
234 
235  /// The real instructions of the processor.
236  ProcessorInstructions processorInstructions_ = invalidProcessorInstructions();
237 
238  /// The lock of the processor class.
239  mutable Lock lock_;
240 };
241 
242 inline unsigned int Processor::cores() const
243 {
244  const ScopedLock scopedLock(lock_);
245 
246  if (forcedCores_ > 0u)
247  {
248  return forcedCores_;
249  }
250 
251  return realCores();
252 }
253 
255 {
256  const ScopedLock scopedLock(lock_);
257 
259  {
261  }
262 
264  return processorInstructions_;
265 }
266 
267 /**
268  * Specialization for one specific instruction.
269  * @see ProcessorInstructionChecker.
270  * @ingroup base
271  */
272 template <ProcessorInstructions tHighestInstructions>
273 class ProcessorInstructionChecker<tHighestInstructions, PI_SSE>
274 {
275  public:
276 
277  /// True, if the requested instruction is part of the provided set of instructions.
278  constexpr static bool value = (tHighestInstructions & PI_SSE_ANY) >= PI_SSE;
279 };
280 
281 /**
282  * Specialization for one specific instruction.
283  * @see ProcessorInstructionChecker.
284  * @ingroup base
285  */
286 template <ProcessorInstructions tHighestInstructions>
287 class ProcessorInstructionChecker<tHighestInstructions, PI_SSE_2>
288 {
289  public:
290 
291  /// True, if the requested instruction is part of the provided set of instructions.
292  constexpr static bool value = (tHighestInstructions & PI_SSE_ANY) >= PI_SSE_2;
293 };
294 
295 /**
296  * Specialization for one specific instruction.
297  * @see ProcessorInstructionChecker.
298  * @ingroup base
299  */
300 template <ProcessorInstructions tHighestInstructions>
301 class ProcessorInstructionChecker<tHighestInstructions, PI_SSE_3>
302 {
303  public:
304 
305  /// True, if the requested instruction is part of the provided set of instructions.
306  constexpr static bool value = (tHighestInstructions & PI_SSE_ANY) >= PI_SSE_3;
307 };
308 
309 /**
310  * Specialization for one specific instruction.
311  * @see ProcessorInstructionChecker.
312  * @ingroup base
313  */
314 template <ProcessorInstructions tHighestInstructions>
315 class ProcessorInstructionChecker<tHighestInstructions, PI_SSSE_3>
316 {
317  public:
318 
319  /// True, if the requested instruction is part of the provided set of instructions.
320  constexpr static bool value = (tHighestInstructions & PI_SSE_ANY) >= PI_SSSE_3;
321 };
322 
323 /**
324  * Specialization for one specific instruction.
325  * @see ProcessorInstructionChecker.
326  * @ingroup base
327  */
328 template <ProcessorInstructions tHighestInstructions>
329 class ProcessorInstructionChecker<tHighestInstructions, PI_SSE_4_1>
330 {
331  public:
332 
333  /// True, if the requested instruction is part of the provided set of instructions.
334  constexpr static bool value = (tHighestInstructions & PI_SSE_ANY) >= PI_SSE_4_1;
335 };
336 
337 /**
338  * Specialization for one specific instruction.
339  * @see ProcessorInstructionChecker.
340  * @ingroup base
341  */
342 template <ProcessorInstructions tHighestInstructions>
343 class ProcessorInstructionChecker<tHighestInstructions, PI_SSE_4_2>
344 {
345  public:
346 
347  /// True, if the requested instruction is part of the provided set of instructions.
348  constexpr static bool value = (tHighestInstructions & PI_SSE_ANY) >= PI_SSE_4_2;
349 };
350 
351 /**
352  * Specialization for one specific instruction.
353  * @see ProcessorInstructionChecker.
354  * @ingroup base
355  */
356 template <ProcessorInstructions tHighestInstructions>
357 class ProcessorInstructionChecker<tHighestInstructions, PI_AVX>
358 {
359  public:
360 
361  /// True, if the requested instruction is part of the provided set of instructions.
362  constexpr static bool value = (tHighestInstructions & PI_AVX_ANY) >= PI_AVX;
363 };
364 
365 /**
366  * Specialization for one specific instruction.
367  * @see ProcessorInstructionChecker.
368  * @ingroup base
369  */
370 template <ProcessorInstructions tHighestInstructions>
371 class ProcessorInstructionChecker<tHighestInstructions, PI_AVX_2>
372 {
373  public:
374 
375  /// True, if the requested instruction is part of the provided set of instructions.
376  constexpr static bool value = (tHighestInstructions & PI_AVX_ANY) >= PI_AVX_2;
377 };
378 
379 /**
380  * Specialization for one specific instruction.
381  * @see ProcessorInstructionChecker.
382  * @ingroup base
383  */
384 template <ProcessorInstructions tHighestInstructions>
385 class ProcessorInstructionChecker<tHighestInstructions, PI_AVX_512>
386 {
387  public:
388 
389  /// True, if the requested instruction is part of the provided set of instructions.
390  constexpr static bool value = (tHighestInstructions & PI_AVX_ANY) >= PI_AVX_512;
391 };
392 
393 /**
394  * Specialization for one specific instruction.
395  * @see ProcessorInstructionChecker.
396  * @ingroup base
397  */
398 template <ProcessorInstructions tHighestInstructions>
399 class ProcessorInstructionChecker<tHighestInstructions, PI_NEON>
400 {
401  public:
402 
403  /// True, if the requested instruction is part of the provided set of instructions.
404  constexpr static bool value = (tHighestInstructions & PI_NEON_ANY) >= PI_NEON;
405 };
406 
407 template <bool tIndependentOfBinary>
409 {
411  {
412  return PI_GROUP_AVX_2_SSE_4_1;
413  }
414 
416  {
417  return PI_GROUP_SSE_4_1;
418  }
419 
421  {
422  return PI_GROUP_AVX_2_SSE_2;
423  }
424 
426  {
427  return PI_GROUP_AVX_2;
428  }
429 
431  {
432  return PI_GROUP_SSE_2;
433  }
434 
436  {
437  return PI_GROUP_NEON;
438  }
439 
440  return PI_NONE;
441 }
442 
443 template <>
444 inline ProcessorInstructions Processor::bestInstructionGroup<false>(const ProcessorInstructions instructions)
445 {
446 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41 && defined(OCEAN_HARDWARE_AVX_VERSION) && OCEAN_HARDWARE_AVX_VERSION >= 20
448  {
449  return PI_GROUP_AVX_2_SSE_4_1;
450  }
451 #endif
452 
453 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
455  {
456  return PI_GROUP_SSE_4_1;
457  }
458 #endif
459 
460 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 20 && defined(OCEAN_HARDWARE_AVX_VERSION) && OCEAN_HARDWARE_AVX_VERSION >= 20
462  {
463  return PI_GROUP_AVX_2_SSE_2;
464  }
465 #endif
466 
467 #if defined(OCEAN_HARDWARE_AVX_VERSION) && OCEAN_HARDWARE_AVX_VERSION >= 20
469  {
470  return PI_GROUP_AVX_2;
471  }
472 #endif
473 
474 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 20
476  {
477  return PI_GROUP_SSE_2;
478  }
479 #endif
480 
481 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
483  {
484  return PI_GROUP_NEON;
485  }
486 #endif
487 
488  OCEAN_SUPPRESS_UNUSED_WARNING(instructions);
489 
490  return PI_NONE;
491 }
492 
494 {
495  const int32_t littleEndianValue = 1;
496 
497  const bool result = (*(int8_t*)(&littleEndianValue)) == int8_t(1);
498 
499 #ifdef OCEAN_LITTLE_ENDIAN
500  ocean_assert(result);
501 #else
502  ocean_assert(!result);
503 #endif
504 
505  return result;
506 }
507 
509 {
510  return ProcessorInstructions(-1);
511 }
512 
513 }
514 
515 #endif // META_OCEAN_BASE_PROCESSOR_H
This class implements a recursive lock object.
Definition: Lock.h:31
This class implements basic functions relating the system processor.
Definition: base/Processor.h:111
Lock lock_
The lock of the processor class.
Definition: base/Processor.h:239
bool forceCores(const unsigned int cores)
Forces a user defined number of processor cores.
ProcessorInstructions instructions()
Returns the supported instruction set of the processor.
Definition: base/Processor.h:254
ProcessorInstructions forcedProcessorInstructions_
Explicitly forced CPU instructions.
Definition: base/Processor.h:233
static std::string deviceModelAppleIOS()
Returns the device name of the Apple iOS device.
static unsigned int realCores()
Returns the number of available processor cores currently detectable.
static ProcessorInstructions bestInstructionGroup(const ProcessorInstructions instructions)
Returns the best group of instructions value for a set of given processor instructions.
Definition: base/Processor.h:408
ProcessorInstructions processorInstructions_
The real instructions of the processor.
Definition: base/Processor.h:236
static ProcessorInstructions realInstructions()
Returns the supported instruction set of the processor.
bool forceInstructions(const ProcessorInstructions instructions)
Forces a user-defined processor instruction set.
static std::string brand()
Returns the processor's brand.
static constexpr ProcessorInstructions invalidProcessorInstructions()
Returns invalid processor instructions.
Definition: base/Processor.h:508
static std::string translateInstructions(const ProcessorInstructions instructions)
Translates a set of processor instructions to a readable string.
static bool isLittleEndian()
Returns whether the processor/system is using the little endian convention (like e....
Definition: base/Processor.h:493
unsigned int forcedCores_
Explicitly forced number of processor cores.
Definition: base/Processor.h:230
static unsigned int realCoresApple()
Returns the number of available processor cores currently detectable.
unsigned int cores() const
Returns the number of available processor cores.
Definition: base/Processor.h:242
Processor()
Constructs a new processor object.
This helper class allows to determine a compile-time known boolean statement whether a set of availab...
Definition: base/Processor.h:96
This class implements a scoped lock object for recursive lock objects.
Definition: Lock.h:135
This template class is the base class for all singleton objects.
Definition: Singleton.h:71
ProcessorInstructions
Definition of individual processor instruction types.
Definition: base/Processor.h:22
@ PI_AVX
AVX instructions.
Definition: base/Processor.h:41
@ PI_SSE_3
SSE3 instructions.
Definition: base/Processor.h:30
@ PI_NONE
Unknown processor instruction set.
Definition: base/Processor.h:24
@ PI_GROUP_AVX_2_SSE_2
All AVX instructions between (including) AVX and AVX2 and SSE instructions between (including) SSE an...
Definition: base/Processor.h:64
@ PI_GROUP_AVX_2
All AVX instructions between (including) AVX and AVX2.
Definition: base/Processor.h:62
@ PI_SSE_4_1
SSE_4.1 instructions.
Definition: base/Processor.h:34
@ PI_GROUP_SSE_4_1
All SSE instructions between (including) SSE and SSE4.1.
Definition: base/Processor.h:60
@ PI_NEON_ANY
Any NEON instructions.
Definition: base/Processor.h:52
@ PI_SSE_4_2
SSE 4.2 instructions.
Definition: base/Processor.h:36
@ PI_SSE_2
SEE2 instructions.
Definition: base/Processor.h:28
@ PI_AVX_ANY
Any AVX instructions.
Definition: base/Processor.h:47
@ PI_NEON
NEON instructions.
Definition: base/Processor.h:50
@ PI_SSE_ANY
Any SSE instructions.
Definition: base/Processor.h:38
@ PI_AVX_512
AVX 512 instructions.
Definition: base/Processor.h:45
@ PI_AVX_2
AVX2 instructions.
Definition: base/Processor.h:43
@ PI_SSE
SEE instructions.
Definition: base/Processor.h:26
@ PI_GROUP_AVX_2_SSE_4_1
All AVX instructions between (including) AVX and AVX2 and SSE instructions between (including) SSE an...
Definition: base/Processor.h:68
@ PI_GROUP_SSE_2
All SSE instructions between (including) SSE and SSE2.
Definition: base/Processor.h:58
@ PI_GROUP_NEON
All NEON instructions (which is currently NEON only).
Definition: base/Processor.h:66
@ PI_AES
AES instructions.
Definition: base/Processor.h:55
@ PI_SSSE_3
SSSE3 instructions.
Definition: base/Processor.h:32
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15