#include <genetic_tuning_harness.h>
|
| GeneticTunerHarness (size_t n, uint8_t crossoverRate, uint8_t mutationRate, size_t numberElites, lang::TreeRef tc, std::string kernelName, const std::unordered_map< size_t, std::vector< const DLTensor * >> &inputs, std::unordered_map< size_t, std::vector< DLTensor * >> &outputs, MappingOptions baseMapping, std::vector< MappingOptions > startingPoints, const TuningParameterFixer &fixedParams) |
|
void | run (size_t numGenerations) |
|
tc::autotune::detail::GeneticTunerHarness::GeneticTunerHarness |
( |
size_t |
n, |
|
|
uint8_t |
crossoverRate, |
|
|
uint8_t |
mutationRate, |
|
|
size_t |
numberElites, |
|
|
lang::TreeRef |
tc, |
|
|
std::string |
kernelName, |
|
|
const std::unordered_map< size_t, std::vector< const DLTensor * >> & |
inputs, |
|
|
std::unordered_map< size_t, std::vector< DLTensor * >> & |
outputs, |
|
|
MappingOptions |
baseMapping, |
|
|
std::vector< MappingOptions > |
startingPoints, |
|
|
const TuningParameterFixer & |
fixedParams |
|
) |
| |
MappingOptions tc::autotune::detail::GeneticTunerHarness::bestMappingOption |
( |
| ) |
|
|
inlineprivate |
Helper function to delegate compiling on the cpu to different threads.
Helper function to delegate running on the gpu to different threads.
void tc::autotune::detail::GeneticTunerHarness::run |
( |
size_t |
numGenerations | ) |
|
void tc::autotune::detail::GeneticTunerHarness::runOneGeneration |
( |
size_t |
generation | ) |
|
|
private |
Traverse one generation of candidates in parallel and evaluate their runtimes
void tc::autotune::detail::GeneticTunerHarness::setupTuningParameters |
( |
| ) |
|
|
private |
bool tc::autotune::detail::GeneticTunerHarness::warmupOrPrune |
( |
tc::ExecutionEngine & |
executionEngine, |
|
|
const std::vector< DLTensor * > & |
outputs, |
|
|
const std::vector< const DLTensor * > & |
inputs, |
|
|
size_t |
handle, |
|
|
size_t |
bestTimeSoFar |
|
) |
| |
|
private |
Helper function to get a kernel into benchmark-able state.
MappingOptions tc::autotune::detail::GeneticTunerHarness::bestMappingOptions_ |
|
private |
size_t tc::autotune::detail::GeneticTunerHarness::bestTime_ = std::numeric_limits<size_t>::max() |
|
private |
std::mutex tc::autotune::detail::GeneticTunerHarness::bestTimeMtx_ |
|
private |
std::atomic_size_t tc::autotune::detail::GeneticTunerHarness::currentCompilationJob_ |
|
private |
const MappingOptions tc::autotune::detail::GeneticTunerHarness::kBaseMapping_ |
|
private |
const uint8_t tc::autotune::detail::GeneticTunerHarness::kCrossOverRate |
constexpr int tc::autotune::detail::GeneticTunerHarness::kEarlyPruneFactor = 5 |
|
static |
const std::unordered_map<size_t, std::vector<const DLTensor*> > tc::autotune::detail::GeneticTunerHarness::kInputs_ |
|
private |
const std::string tc::autotune::detail::GeneticTunerHarness::kKernelName_ |
|
private |
const size_t tc::autotune::detail::GeneticTunerHarness::kMaxPopulationSize |
const uint8_t tc::autotune::detail::GeneticTunerHarness::kMutationRate |
const size_t tc::autotune::detail::GeneticTunerHarness::kNumberElites |
constexpr int tc::autotune::detail::GeneticTunerHarness::kReducedBenchmarkIterations = 10 |
|
static |
constexpr int tc::autotune::detail::GeneticTunerHarness::kReducedWarmupIterations = 2 |
|
static |
const std::vector<MappingOptions> tc::autotune::detail::GeneticTunerHarness::kStartingPoints_ |
|
private |
const lang::TreeRef tc::autotune::detail::GeneticTunerHarness::kTc_ |
|
private |
std::atomic_size_t tc::autotune::detail::GeneticTunerHarness::numEvaluations_ |
|
private |
std::unordered_map<size_t, std::vector<DLTensor*> > tc::autotune::detail::GeneticTunerHarness::outputs_ |
|
private |
std::deque<std::atomic_bool> tc::autotune::detail::GeneticTunerHarness::readyToEvaluate_ |
|
private |
std::unique_ptr<GeneticSearch> tc::autotune::detail::GeneticTunerHarness::tuner_ |
|
private |
The documentation for this class was generated from the following file: