#include <genetic_tuning_harness.h>
|
| | GeneticTunerHarness (size_t n, uint8_t crossoverRate, uint8_t mutationRate, size_t numberElites, lang::TreeRef tc, std::string kernelName, const std::unordered_map< size_t, std::vector< const DLTensor * >> &inputs, std::unordered_map< size_t, std::vector< DLTensor * >> &outputs, MappingOptions baseMapping, std::vector< MappingOptions > startingPoints, const TuningParameterFixer &fixedParams) |
| |
| void | run (size_t numGenerations) |
| |
| tc::autotune::detail::GeneticTunerHarness::GeneticTunerHarness |
( |
size_t |
n, |
|
|
uint8_t |
crossoverRate, |
|
|
uint8_t |
mutationRate, |
|
|
size_t |
numberElites, |
|
|
lang::TreeRef |
tc, |
|
|
std::string |
kernelName, |
|
|
const std::unordered_map< size_t, std::vector< const DLTensor * >> & |
inputs, |
|
|
std::unordered_map< size_t, std::vector< DLTensor * >> & |
outputs, |
|
|
MappingOptions |
baseMapping, |
|
|
std::vector< MappingOptions > |
startingPoints, |
|
|
const TuningParameterFixer & |
fixedParams |
|
) |
| |
| MappingOptions tc::autotune::detail::GeneticTunerHarness::bestMappingOption |
( |
| ) |
|
|
inlineprivate |
Helper function to delegate compiling on the cpu to different threads.
Helper function to delegate running on the gpu to different threads.
| void tc::autotune::detail::GeneticTunerHarness::run |
( |
size_t |
numGenerations | ) |
|
| void tc::autotune::detail::GeneticTunerHarness::runOneGeneration |
( |
size_t |
generation | ) |
|
|
private |
Traverse one generation of candidates in parallel and evaluate their runtimes
| void tc::autotune::detail::GeneticTunerHarness::setupTuningParameters |
( |
| ) |
|
|
private |
| bool tc::autotune::detail::GeneticTunerHarness::warmupOrPrune |
( |
tc::ExecutionEngine & |
executionEngine, |
|
|
const std::vector< DLTensor * > & |
outputs, |
|
|
const std::vector< const DLTensor * > & |
inputs, |
|
|
size_t |
handle, |
|
|
size_t |
bestTimeSoFar |
|
) |
| |
|
private |
Helper function to get a kernel into benchmark-able state.
| MappingOptions tc::autotune::detail::GeneticTunerHarness::bestMappingOptions_ |
|
private |
| size_t tc::autotune::detail::GeneticTunerHarness::bestTime_ = std::numeric_limits<size_t>::max() |
|
private |
| std::mutex tc::autotune::detail::GeneticTunerHarness::bestTimeMtx_ |
|
private |
| std::atomic_size_t tc::autotune::detail::GeneticTunerHarness::currentCompilationJob_ |
|
private |
| const MappingOptions tc::autotune::detail::GeneticTunerHarness::kBaseMapping_ |
|
private |
| const uint8_t tc::autotune::detail::GeneticTunerHarness::kCrossOverRate |
| constexpr int tc::autotune::detail::GeneticTunerHarness::kEarlyPruneFactor = 5 |
|
static |
| const std::unordered_map<size_t, std::vector<const DLTensor*> > tc::autotune::detail::GeneticTunerHarness::kInputs_ |
|
private |
| const std::string tc::autotune::detail::GeneticTunerHarness::kKernelName_ |
|
private |
| const size_t tc::autotune::detail::GeneticTunerHarness::kMaxPopulationSize |
| const uint8_t tc::autotune::detail::GeneticTunerHarness::kMutationRate |
| const size_t tc::autotune::detail::GeneticTunerHarness::kNumberElites |
| constexpr int tc::autotune::detail::GeneticTunerHarness::kReducedBenchmarkIterations = 10 |
|
static |
| constexpr int tc::autotune::detail::GeneticTunerHarness::kReducedWarmupIterations = 2 |
|
static |
| const std::vector<MappingOptions> tc::autotune::detail::GeneticTunerHarness::kStartingPoints_ |
|
private |
| const lang::TreeRef tc::autotune::detail::GeneticTunerHarness::kTc_ |
|
private |
| std::atomic_size_t tc::autotune::detail::GeneticTunerHarness::numEvaluations_ |
|
private |
| std::unordered_map<size_t, std::vector<DLTensor*> > tc::autotune::detail::GeneticTunerHarness::outputs_ |
|
private |
| std::deque<std::atomic_bool> tc::autotune::detail::GeneticTunerHarness::readyToEvaluate_ |
|
private |
| std::unique_ptr<GeneticSearch> tc::autotune::detail::GeneticTunerHarness::tuner_ |
|
private |
The documentation for this class was generated from the following file: