Ocean
Loading...
Searching...
No Matches
base/Processor.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_BASE_PROCESSOR_H
9#define META_OCEAN_BASE_PROCESSOR_H
10
11#include "ocean/base/Base.h"
13
14namespace Ocean
15{
16
17/**
18 * Definition of individual processor instruction types.
19 * @ingroup base
20 */
21enum ProcessorInstructions : uint32_t
22{
23 /// Unknown processor instruction set.
24 PI_NONE = 0u,
25 /// SEE instructions.
26 PI_SSE = 1u << 0u,
27 /// SEE2 instructions.
28 PI_SSE_2 = 1u << 1u,
29 /// SSE3 instructions.
30 PI_SSE_3 = 1u << 2u,
31 /// SSSE3 instructions.
32 PI_SSSE_3 = 1u << 3u,
33 /// SSE_4.1 instructions.
34 PI_SSE_4_1 = 1u << 4u,
35 /// SSE 4.2 instructions.
36 PI_SSE_4_2 = 1u << 5u,
37 /// Any SSE instructions.
39
40 /// AVX instructions.
41 PI_AVX = 1u << 6u,
42 /// AVX2 instructions.
43 PI_AVX_2 = 1u << 7u,
44 /// AVX 512 instructions.
45 PI_AVX_512 = 1u << 8u,
46 /// Any AVX instructions.
48
49 /// NEON instructions.
50 PI_NEON = 1 << 9u,
51 /// Any NEON instructions.
53
54 /// AES instructions.
55 PI_AES = 1 << 10u,
56
57 /// All SSE instructions between (including) SSE and SSE2.
59 /// All SSE instructions between (including) SSE and SSE4.1.
61 /// All AVX instructions between (including) AVX and AVX2.
63 /// All AVX instructions between (including) AVX and AVX2 and SSE instructions between (including) SSE and SSE2, e.g., for processors supporting SSSE3 but not SSE3.
65 /// All NEON instructions (which is currently NEON only).
67 /// All AVX instructions between (including) AVX and AVX2 and SSE instructions between (including) SSE and SSE4.1.
69};
70
71/**
72 * This helper class allows to determine a compile-time known boolean statement whether a set of available instruction contains (included) a specified (minimal requirement) instruction.
73 * See this tutorial:
74 * @code
75 * template <ProcessorInstructions tHighestInstructions>
76 * void function()
77 * {
78 * static_assert((InstructionChecker<tHighestInstructions, SSE_2>::value), "This function needs at least SSE2 instructions");
79 *
80 * if (InstructionChecker<tHighestInstructions, SSE_4_1>::value)
81 * {
82 * // place code needing (at most) SSE4.1 instructions here
83 * }
84 * else if (InstructionChecker<tHighestInstructions, SSE_2>::value)
85 * {
86 * // place an alternative code using (at most) SS2 instructions here
87 * }
88 * }
89 * @endcode
90 * @ingroup base
91 * @tparam tHighestInstructions The set of available instructions, may be any combination of instructions
92 * @tparam tNecessaryInstruction The instruction that is required for a specific function (the minimal requirement), must be one specific instruction (not a set of several instructions)
93 */
94template <ProcessorInstructions tHighestInstructions, ProcessorInstructions tNecessaryInstruction>
96{
97 public:
98
99 /**
100 * True, if the requested instruction is part of the provided set of instructions.
101 * Here we disable the definition to ensure that the specialized template classes are used, as otherwise the parameter 'tNecessaryInstruction' covers a combination of several instructions.
102 */
103 // const static bool value = true;
104};
105
106/**
107 * This class implements basic functions relating the system processor.
108 * @ingroup base
109 */
110class OCEAN_BASE_EXPORT Processor : public Singleton<Processor>
111{
112 friend class Singleton<Processor>;
113
114 public:
115
116 /**
117 * Returns the number of available processor cores.
118 * If an explicit number of processors has been forced by the user, the user defined number will be returned.
119 * @return Number of processor cores
120 * @see realCores(), forceCores().
121 */
122 inline unsigned int cores() const;
123
124 /**
125 * Returns the supported instruction set of the processor.
126 * If an explicit instruction set has been forced by the user, the user defined instruction set will be returned.
127 * @return Instruction set of the processor
128 * @see realInstructions(), forceInstructions().
129 */
130 inline ProcessorInstructions instructions();
131
132 /**
133 * Forces a user defined number of processor cores.
134 * The forced number will be returned instead of the real cores using the cores() function.
135 * @param cores Number of cores to be forced, 0 to remove the previously forced core number
136 * @return True, if succeeded
137 * @see cores().
138 */
139 bool forceCores(const unsigned int cores);
140
141 /**
142 * Forces a user-defined processor instruction set.
143 * The forced instruction set will be returned instead of the real instruction set using the instructions() function.
144 * @param instructions The instruction set to be forced, -1 to remove the previously forced instruction set
145 * @return True, if succeeded
146 * @see instructions().
147 */
148 bool forceInstructions(const ProcessorInstructions instructions);
149
150 /**
151 * Returns the processor's brand.
152 * @return The processor's brand
153 */
154 static std::string brand();
155
156 /**
157 * Returns the number of available processor cores currently detectable.
158 * @return Number of processor cores
159 * @see cores().
160 */
161 static unsigned int realCores();
162
163 /**
164 * Returns the supported instruction set of the processor.
165 * @return The supported set of instructions
166 * @see instructions().
167 */
169
170 /**
171 * Translates a set of processor instructions to a readable string.
172 * @param instructions The instructions to be translated
173 * @return The resulting string containing the instruction names, 'No SIMD Instructions' if no instruction is specified
174 */
175 static std::string translateInstructions(const ProcessorInstructions instructions);
176
177 /**
178 * Returns the best group of instructions value for a set of given processor instructions.
179 * The function may return the following groups in the following order: PI_GROUP_AVX_2_SSE_4_1, PI_GROUP_SSE_4_1, PI_GROUP_AVX_2_SSE_2, PI_GROUP_AVX_2, PI_GROUP_SSE_2, PI_GROUP_NEON.
180 * @param instructions The set of instructions for which the best group will be returned
181 * @return The best group of instructions, PI_NONE if no group is matching
182 * @tparam tIndependentOfBinary True, to return the best group without checking the binaries capabilities; False, to return groups which are supported by the current binary only
183 */
184 template <bool tIndependentOfBinary>
185 static inline ProcessorInstructions bestInstructionGroup(const ProcessorInstructions instructions);
186
187 /**
188 * Returns whether the processor/system is using the little endian convention (like e.g., x86) or whether the big endian convention is used.
189 * @return True, if the little endian convention is used
190 */
191 static inline bool isLittleEndian();
192
193 private:
194
195 /**
196 * Constructs a new processor object.
197 */
199
200#if defined(__APPLE__)
201
202 /**
203 * Returns the number of available processor cores currently detectable.
204 * @return Number of processor cores
205 * @see realCores().
206 */
207 static unsigned int realCoresApple();
208
209 #if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE==1
210
211 /**
212 * Returns the device name of the Apple iOS device.
213 * @return The device name
214 */
215 static std::string deviceModelAppleIOS();
216
217 #endif // TARGET_OS_IPHONE==1
218
219#endif // __APPLE__
220
221 /**
222 * Returns invalid processor instructions.
223 * @return Invalid instructions
224 */
225 static constexpr ProcessorInstructions invalidProcessorInstructions();
226
227 private:
228
229 /// Explicitly forced number of processor cores.
230 unsigned int forcedCores_ = 0u;
231
232 /// Explicitly forced CPU instructions.
233 ProcessorInstructions forcedProcessorInstructions_ = invalidProcessorInstructions();
234
235 /// The real instructions of the processor.
236 ProcessorInstructions processorInstructions_ = invalidProcessorInstructions();
237
238 /// The lock of the processor class.
239 mutable Lock lock_;
240};
241
242inline unsigned int Processor::cores() const
243{
244 const ScopedLock scopedLock(lock_);
245
246 if (forcedCores_ > 0u)
247 {
248 return forcedCores_;
249 }
250
251 return realCores();
252}
253
266
267/**
268 * Specialization for one specific instruction.
269 * @see ProcessorInstructionChecker.
270 * @ingroup base
271 */
272template <ProcessorInstructions tHighestInstructions>
273class ProcessorInstructionChecker<tHighestInstructions, PI_SSE>
274{
275 public:
276
277 /// True, if the requested instruction is part of the provided set of instructions.
278 constexpr static bool value = (tHighestInstructions & PI_SSE_ANY) >= PI_SSE;
279};
280
281/**
282 * Specialization for one specific instruction.
283 * @see ProcessorInstructionChecker.
284 * @ingroup base
285 */
286template <ProcessorInstructions tHighestInstructions>
287class ProcessorInstructionChecker<tHighestInstructions, PI_SSE_2>
288{
289 public:
290
291 /// True, if the requested instruction is part of the provided set of instructions.
292 constexpr static bool value = (tHighestInstructions & PI_SSE_ANY) >= PI_SSE_2;
293};
294
295/**
296 * Specialization for one specific instruction.
297 * @see ProcessorInstructionChecker.
298 * @ingroup base
299 */
300template <ProcessorInstructions tHighestInstructions>
301class ProcessorInstructionChecker<tHighestInstructions, PI_SSE_3>
302{
303 public:
304
305 /// True, if the requested instruction is part of the provided set of instructions.
306 constexpr static bool value = (tHighestInstructions & PI_SSE_ANY) >= PI_SSE_3;
307};
308
309/**
310 * Specialization for one specific instruction.
311 * @see ProcessorInstructionChecker.
312 * @ingroup base
313 */
314template <ProcessorInstructions tHighestInstructions>
315class ProcessorInstructionChecker<tHighestInstructions, PI_SSSE_3>
316{
317 public:
318
319 /// True, if the requested instruction is part of the provided set of instructions.
320 constexpr static bool value = (tHighestInstructions & PI_SSE_ANY) >= PI_SSSE_3;
321};
322
323/**
324 * Specialization for one specific instruction.
325 * @see ProcessorInstructionChecker.
326 * @ingroup base
327 */
328template <ProcessorInstructions tHighestInstructions>
329class ProcessorInstructionChecker<tHighestInstructions, PI_SSE_4_1>
330{
331 public:
332
333 /// True, if the requested instruction is part of the provided set of instructions.
334 constexpr static bool value = (tHighestInstructions & PI_SSE_ANY) >= PI_SSE_4_1;
335};
336
337/**
338 * Specialization for one specific instruction.
339 * @see ProcessorInstructionChecker.
340 * @ingroup base
341 */
342template <ProcessorInstructions tHighestInstructions>
343class ProcessorInstructionChecker<tHighestInstructions, PI_SSE_4_2>
344{
345 public:
346
347 /// True, if the requested instruction is part of the provided set of instructions.
348 constexpr static bool value = (tHighestInstructions & PI_SSE_ANY) >= PI_SSE_4_2;
349};
350
351/**
352 * Specialization for one specific instruction.
353 * @see ProcessorInstructionChecker.
354 * @ingroup base
355 */
356template <ProcessorInstructions tHighestInstructions>
357class ProcessorInstructionChecker<tHighestInstructions, PI_AVX>
358{
359 public:
360
361 /// True, if the requested instruction is part of the provided set of instructions.
362 constexpr static bool value = (tHighestInstructions & PI_AVX_ANY) >= PI_AVX;
363};
364
365/**
366 * Specialization for one specific instruction.
367 * @see ProcessorInstructionChecker.
368 * @ingroup base
369 */
370template <ProcessorInstructions tHighestInstructions>
371class ProcessorInstructionChecker<tHighestInstructions, PI_AVX_2>
372{
373 public:
374
375 /// True, if the requested instruction is part of the provided set of instructions.
376 constexpr static bool value = (tHighestInstructions & PI_AVX_ANY) >= PI_AVX_2;
377};
378
379/**
380 * Specialization for one specific instruction.
381 * @see ProcessorInstructionChecker.
382 * @ingroup base
383 */
384template <ProcessorInstructions tHighestInstructions>
385class ProcessorInstructionChecker<tHighestInstructions, PI_AVX_512>
386{
387 public:
388
389 /// True, if the requested instruction is part of the provided set of instructions.
390 constexpr static bool value = (tHighestInstructions & PI_AVX_ANY) >= PI_AVX_512;
391};
392
393/**
394 * Specialization for one specific instruction.
395 * @see ProcessorInstructionChecker.
396 * @ingroup base
397 */
398template <ProcessorInstructions tHighestInstructions>
399class ProcessorInstructionChecker<tHighestInstructions, PI_NEON>
400{
401 public:
402
403 /// True, if the requested instruction is part of the provided set of instructions.
404 constexpr static bool value = (tHighestInstructions & PI_NEON_ANY) >= PI_NEON;
405};
406
407template <bool tIndependentOfBinary>
409{
411 {
413 }
414
416 {
417 return PI_GROUP_SSE_4_1;
418 }
419
421 {
423 }
424
426 {
427 return PI_GROUP_AVX_2;
428 }
429
431 {
432 return PI_GROUP_SSE_2;
433 }
434
436 {
437 return PI_GROUP_NEON;
438 }
439
440 return PI_NONE;
441}
442
443template <>
444inline ProcessorInstructions Processor::bestInstructionGroup<false>(const ProcessorInstructions instructions)
445{
446#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41 && defined(OCEAN_HARDWARE_AVX_VERSION) && OCEAN_HARDWARE_AVX_VERSION >= 20
448 {
450 }
451#endif
452
453#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
455 {
456 return PI_GROUP_SSE_4_1;
457 }
458#endif
459
460#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 20 && defined(OCEAN_HARDWARE_AVX_VERSION) && OCEAN_HARDWARE_AVX_VERSION >= 20
462 {
464 }
465#endif
466
467#if defined(OCEAN_HARDWARE_AVX_VERSION) && OCEAN_HARDWARE_AVX_VERSION >= 20
469 {
470 return PI_GROUP_AVX_2;
471 }
472#endif
473
474#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 20
476 {
477 return PI_GROUP_SSE_2;
478 }
479#endif
480
481#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
483 {
484 return PI_GROUP_NEON;
485 }
486#endif
487
488 OCEAN_SUPPRESS_UNUSED_WARNING(instructions);
489
490 return PI_NONE;
491}
492
494{
495 const int32_t littleEndianValue = 1;
496
497 const bool result = (*(int8_t*)(&littleEndianValue)) == int8_t(1);
498
499#ifdef OCEAN_LITTLE_ENDIAN
500 ocean_assert(result);
501#else
502 ocean_assert(!result);
503#endif
504
505 return result;
506}
507
512
513}
514
515#endif // META_OCEAN_BASE_PROCESSOR_H
This class implements a recursive lock object.
Definition Lock.h:31
This class implements basic functions relating the system processor.
Definition base/Processor.h:111
Lock lock_
The lock of the processor class.
Definition base/Processor.h:239
bool forceCores(const unsigned int cores)
Forces a user defined number of processor cores.
ProcessorInstructions instructions()
Returns the supported instruction set of the processor.
Definition base/Processor.h:254
ProcessorInstructions forcedProcessorInstructions_
Explicitly forced CPU instructions.
Definition base/Processor.h:233
static std::string deviceModelAppleIOS()
Returns the device name of the Apple iOS device.
static unsigned int realCores()
Returns the number of available processor cores currently detectable.
static ProcessorInstructions bestInstructionGroup(const ProcessorInstructions instructions)
Returns the best group of instructions value for a set of given processor instructions.
Definition base/Processor.h:408
ProcessorInstructions processorInstructions_
The real instructions of the processor.
Definition base/Processor.h:236
static ProcessorInstructions realInstructions()
Returns the supported instruction set of the processor.
bool forceInstructions(const ProcessorInstructions instructions)
Forces a user-defined processor instruction set.
static std::string brand()
Returns the processor's brand.
static constexpr ProcessorInstructions invalidProcessorInstructions()
Returns invalid processor instructions.
Definition base/Processor.h:508
static std::string translateInstructions(const ProcessorInstructions instructions)
Translates a set of processor instructions to a readable string.
static bool isLittleEndian()
Returns whether the processor/system is using the little endian convention (like e....
Definition base/Processor.h:493
unsigned int forcedCores_
Explicitly forced number of processor cores.
Definition base/Processor.h:230
static unsigned int realCoresApple()
Returns the number of available processor cores currently detectable.
unsigned int cores() const
Returns the number of available processor cores.
Definition base/Processor.h:242
Processor()
Constructs a new processor object.
This helper class allows to determine a compile-time known boolean statement whether a set of availab...
Definition base/Processor.h:96
This class implements a scoped lock object for recursive lock objects.
Definition Lock.h:135
This template class is the base class for all singleton objects.
Definition Singleton.h:71
ProcessorInstructions
Definition of individual processor instruction types.
Definition base/Processor.h:22
@ PI_AVX
AVX instructions.
Definition base/Processor.h:41
@ PI_SSE_3
SSE3 instructions.
Definition base/Processor.h:30
@ PI_NONE
Unknown processor instruction set.
Definition base/Processor.h:24
@ PI_GROUP_AVX_2_SSE_2
All AVX instructions between (including) AVX and AVX2 and SSE instructions between (including) SSE an...
Definition base/Processor.h:64
@ PI_GROUP_AVX_2
All AVX instructions between (including) AVX and AVX2.
Definition base/Processor.h:62
@ PI_SSE_4_1
SSE_4.1 instructions.
Definition base/Processor.h:34
@ PI_GROUP_SSE_4_1
All SSE instructions between (including) SSE and SSE4.1.
Definition base/Processor.h:60
@ PI_NEON_ANY
Any NEON instructions.
Definition base/Processor.h:52
@ PI_SSE_4_2
SSE 4.2 instructions.
Definition base/Processor.h:36
@ PI_SSE_2
SEE2 instructions.
Definition base/Processor.h:28
@ PI_AVX_ANY
Any AVX instructions.
Definition base/Processor.h:47
@ PI_NEON
NEON instructions.
Definition base/Processor.h:50
@ PI_SSE_ANY
Any SSE instructions.
Definition base/Processor.h:38
@ PI_AVX_512
AVX 512 instructions.
Definition base/Processor.h:45
@ PI_AVX_2
AVX2 instructions.
Definition base/Processor.h:43
@ PI_SSE
SEE instructions.
Definition base/Processor.h:26
@ PI_GROUP_AVX_2_SSE_4_1
All AVX instructions between (including) AVX and AVX2 and SSE instructions between (including) SSE an...
Definition base/Processor.h:68
@ PI_GROUP_SSE_2
All SSE instructions between (including) SSE and SSE2.
Definition base/Processor.h:58
@ PI_GROUP_NEON
All NEON instructions (which is currently NEON only).
Definition base/Processor.h:66
@ PI_AES
AES instructions.
Definition base/Processor.h:55
@ PI_SSSE_3
SSSE3 instructions.
Definition base/Processor.h:32
The namespace covering the entire Ocean framework.
Definition Accessor.h:15