Tensor Comprehensions
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
genetic_tuning_harness.h
Go to the documentation of this file.
1 
16 #pragma once
17 
18 #include <atomic>
19 #include <csignal>
20 #include <deque>
21 #include <memory>
22 #include <unordered_map>
23 #include <vector>
24 
25 #include "tc/aten/aten_compiler.h"
29 #include "tc/lang/parser.h"
30 
31 namespace tc {
32 namespace autotune {
33 namespace detail {
34 
35 extern volatile std::sig_atomic_t signal_;
36 extern volatile std::sig_atomic_t killRequested_;
37 
39  public:
41  size_t n,
42  uint8_t crossoverRate,
43  uint8_t mutationRate,
44  size_t numberElites,
45  lang::TreeRef tc,
46  std::string kernelName,
47  const std::unordered_map<size_t, std::vector<const DLTensor*>>& inputs,
48  std::unordered_map<size_t, std::vector<DLTensor*>>& outputs,
49  MappingOptions baseMapping,
50  std::vector<MappingOptions> startingPoints,
51  const TuningParameterFixer& fixedParams);
52  void run(size_t numGenerations);
53 
54  private:
55  void setupTuningParameters();
56 
59  void runOneGeneration(size_t generation);
60 
62  bool warmupOrPrune(
63  tc::ExecutionEngine& executionEngine,
64  const std::vector<DLTensor*>& outputs,
65  const std::vector<const DLTensor*>& inputs,
66  size_t handle,
67  size_t bestTimeSoFar);
68 
70  void doCompile(tc::ExecutionEngine& engine);
72  void doGpuWork(size_t gpu, tc::ExecutionEngine& engine, Printer& printer);
73 
78  std::lock_guard<std::mutex> lock(bestTimeMtx_);
79  return bestMappingOptions_;
80  }
81 
82  public:
83  static constexpr int kReducedWarmupIterations = 2;
84  static constexpr int kReducedBenchmarkIterations = 10;
85  static constexpr int kEarlyPruneFactor = 5;
86 
87  const size_t kMaxPopulationSize;
88  const uint8_t kCrossOverRate;
89  const uint8_t kMutationRate;
90  const size_t kNumberElites;
91 
93 
94  private:
95  std::mutex bestTimeMtx_;
96  size_t bestTime_ = std::numeric_limits<size_t>::max();
98 
100  const std::string kKernelName_;
101  std::unique_ptr<GeneticSearch> tuner_;
102  std::atomic_size_t currentCompilationJob_;
103  std::deque<std::atomic_bool> readyToEvaluate_;
104  std::atomic_size_t numEvaluations_;
105  const std::unordered_map<size_t, std::vector<const DLTensor*>> kInputs_;
106  std::unordered_map<size_t, std::vector<DLTensor*>> outputs_;
108  const std::vector<MappingOptions> kStartingPoints_;
109 };
110 
111 std::vector<size_t> parseGpus();
112 
113 } // namespace detail
114 } // namespace autotune
115 } // namespace tc
const size_t kMaxPopulationSize
Definition: genetic_tuning_harness.h:87
Definition: printer.h:33
const std::vector< MappingOptions > kStartingPoints_
Definition: genetic_tuning_harness.h:108
Definition: execution_engine.h:34
std::unique_ptr< GeneticSearch > tuner_
Definition: genetic_tuning_harness.h:101
const lang::TreeRef kTc_
Definition: genetic_tuning_harness.h:99
Definition: parameters.h:149
bool warmupOrPrune(tc::ExecutionEngine &executionEngine, const std::vector< DLTensor * > &outputs, const std::vector< const DLTensor * > &inputs, size_t handle, size_t bestTimeSoFar)
Helper function to get a kernel into benchmark-able state.
const std::unordered_map< size_t, std::vector< const DLTensor * > > kInputs_
Definition: genetic_tuning_harness.h:105
void run(size_t numGenerations)
MappingOptions bestMappingOption()
Definition: genetic_tuning_harness.h:77
TuningConfiguration configuration
Definition: genetic_tuning_harness.h:92
static constexpr int kReducedBenchmarkIterations
Definition: genetic_tuning_harness.h:84
std::mutex bestTimeMtx_
Definition: genetic_tuning_harness.h:95
std::atomic_size_t currentCompilationJob_
Definition: genetic_tuning_harness.h:102
std::deque< std::atomic_bool > readyToEvaluate_
Definition: genetic_tuning_harness.h:103
const uint8_t kMutationRate
Definition: genetic_tuning_harness.h:89
Definition: genetic_tuning_harness.h:38
size_t bestTime_
Definition: genetic_tuning_harness.h:96
TuningConfiguration makeTuningConfiguration(const MappingOptions &options)
Definition: mapping_options.h:336
void runOneGeneration(size_t generation)
volatile std::sig_atomic_t killRequested_
static constexpr int kReducedWarmupIterations
Definition: genetic_tuning_harness.h:83
const std::string kKernelName_
Definition: genetic_tuning_harness.h:100
const MappingOptions kBaseMapping_
Definition: genetic_tuning_harness.h:107
const size_t kNumberElites
Definition: genetic_tuning_harness.h:90
void doCompile(tc::ExecutionEngine &engine)
Helper function to delegate compiling on the cpu to different threads.
Definition: parameters.h:188
void doGpuWork(size_t gpu, tc::ExecutionEngine &engine, Printer &printer)
Helper function to delegate running on the gpu to different threads.
std::unordered_map< size_t, std::vector< DLTensor * > > outputs_
Definition: genetic_tuning_harness.h:106
MappingOptions bestMappingOptions_
Definition: genetic_tuning_harness.h:97
GeneticTunerHarness(size_t n, uint8_t crossoverRate, uint8_t mutationRate, size_t numberElites, lang::TreeRef tc, std::string kernelName, const std::unordered_map< size_t, std::vector< const DLTensor * >> &inputs, std::unordered_map< size_t, std::vector< DLTensor * >> &outputs, MappingOptions baseMapping, std::vector< MappingOptions > startingPoints, const TuningParameterFixer &fixedParams)
std::atomic_size_t numEvaluations_
Definition: genetic_tuning_harness.h:104
const uint8_t kCrossOverRate
Definition: genetic_tuning_harness.h:88
std::shared_ptr< Tree > TreeRef
Definition: tree.h:44
Definition: parameters.h:225
volatile std::sig_atomic_t signal_
static constexpr int kEarlyPruneFactor
Definition: genetic_tuning_harness.h:85
std::vector< size_t > parseGpus()
tc::MappingOptions makeOptions(const CandidateConfiguration &conf)
Make options from conf.