25 #include <dlpack/dlpack.h>
27 #include <compcache.pb.h>
57 template <
typename CC>
64 template <
typename Protobuf>
66 static std::shared_ptr<CC>
getCache();
84 : invalid_argument(what_arg) {}
86 : invalid_argument(what_arg) {}
125 const std::string&
id,
126 const std::string& kernelSpecializedName,
127 const std::vector<int>& kernelParameters,
131 const std::vector<const DLTensor*>& inputs,
132 const std::vector<const DLTensor*>& outputs,
133 const std::string& cudaSource,
134 const std::string& deviceStr);
170 const std::string&
id,
172 const std::vector<detail::TensorInfo>& inputs,
173 const std::vector<detail::TensorInfo>& outputs);
175 const std::string&
id,
177 const std::vector<const DLTensor*>& inputs,
178 const std::vector<const DLTensor*>& outputs);
180 const std::string&
id,
182 const std::vector<const DLTensor*>& inputs,
183 const std::vector<const DLTensor*>& outputs)
const;
186 template <
typename C,
typename TensorTy>
189 const std::string&
id,
191 const std::vector<TensorTy>& inputs,
192 const std::vector<TensorTy>& outputs)
193 -> decltype(c.searchKernel(
id, options, inputs, outputs));
206 const std::string&
id,
208 const std::vector<const DLTensor*>& inputs,
209 const std::vector<const DLTensor*>& outputs,
210 const std::string& kernelSpecializedName,
211 const std::vector<int>& kernelParameters,
212 const std::string& cudaSource,
221 const std::string&
id,
223 const std::vector<const DLTensor*>& inputs,
224 const std::vector<const DLTensor*>& outputs)
const;
249 const std::string&
id,
250 const std::vector<const DLTensor*>& inputs,
251 const std::vector<const DLTensor*>& outputs,
252 const std::string& deviceStr,
259 Key(
const std::string&
id,
260 const std::vector<const DLTensor*>& inputs,
261 const std::vector<const DLTensor*>& outputs,
262 const std::string& deviceStr,
265 Key(
const std::string&
id,
266 std::vector<detail::TensorInfo>&& inputs,
267 std::vector<detail::TensorInfo>&& outputs,
268 const std::string& deviceStr,
269 const std::string& gitVersion);
299 const std::string&
id,
300 const std::vector<const DLTensor*>& inputs,
301 const std::vector<const DLTensor*>& outputs);
303 const std::string&
id,
304 const std::vector<const DLTensor*>& input,
305 const std::vector<const DLTensor*>& outputs)
const;
308 template <
typename C>
311 const std::string&
id,
312 const std::vector<const DLTensor*>& inputs,
313 const std::vector<const DLTensor*>& outputs)
314 -> decltype(c.searchKernel(
id, inputs, outputs));
334 const std::string&
id,
336 const std::vector<const DLTensor*>& inputs,
337 const std::vector<const DLTensor*>& outputs,
341 const std::string&
id,
342 const std::vector<const DLTensor*>& inputs,
343 const std::vector<const DLTensor*>& outputs)
const;
346 const std::string&
id,
347 const std::vector<const DLTensor*>& inputs,
348 const std::vector<const DLTensor*>& outputs)
const;
351 const std::string&
id,
352 const std::vector<const DLTensor*>& inputs,
353 const std::vector<const DLTensor*>& outputs,
385 const std::string&
id,
386 const std::string& kernelSpecializedName,
387 const std::vector<int>& kernelParameters,
390 const std::vector<const DLTensor*>& inputs,
391 const std::vector<const DLTensor*>& outputs,
392 const std::string& cudaSource,
393 const std::string& deviceStr);
426 const std::string&
id,
427 const std::vector<detail::TensorInfo>& inputs,
428 const std::vector<detail::TensorInfo>& outputs);
430 const std::string&
id,
431 const std::vector<const DLTensor*>& inputs,
432 const std::vector<const DLTensor*>& outputs);
434 const std::string&
id,
435 const std::vector<const DLTensor*>& inputs,
436 const std::vector<const DLTensor*>& outputs)
const;
439 template <
typename C,
typename InputTy>
442 const std::string&
id,
443 const std::vector<InputTy>& inputs,
444 const std::vector<InputTy>& outputs)
445 -> decltype(c.searchKernel(
id, inputs, outputs));
459 const std::string&
id,
460 const std::vector<const DLTensor*>& inputs,
461 const std::vector<const DLTensor*>& outputs,
462 const std::string& kernelSpecializedName,
463 const std::vector<int>& kernelParameters,
464 const std::string& cudaSource,
473 const std::string&
id,
474 const std::vector<const DLTensor*>& inputs,
475 const std::vector<const DLTensor*>& outputs)
const;
483 const std::vector<const DLTensor*>& inputsTensor,
484 const std::vector<detail::TensorInfo>& inputsInfo);
std::vector< detail::TensorInfo > inputs
Definition: compilation_cache.h:400
std::string makeOptionsFilename(const std::string &filename)
CudaCacheProto Protobuf
Definition: compilation_cache.h:96
std::string id
Definition: compilation_cache.h:140
CachedEntry * searchKernel(const std::string &id, const std::vector< detail::TensorInfo > &inputs, const std::vector< detail::TensorInfo > &outputs)
Definition: compilation_cache.h:398
Definition: compilation_cache.h:258
ManualCudaCacheProto Protobuf
Definition: compilation_cache.h:367
MappingOptions options
Definition: compilation_cache.h:325
Definition: compilation_cache.h:100
static bool cacheEnabled()
Definition: compilation_cache-inl.h:80
Specializing CudaDim to differentiate between Block and Grid sizes.
Definition: mapping_options.h:208
void keepOnlyBestCandidates(size_t numberToKeep)
CachedEntry(const std::string &id, const std::string &kernelSpecializedName, const std::vector< int > &kernelParameters, const Grid &grid, const Block &block, const MappingOptions &mappingOptions, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &cudaSource, const std::string &deviceStr)
std::string kernelSpecializedName
Definition: compilation_cache.h:150
Definition: compilation_cache.h:406
void removeFromCudaCacheEntriesNotInOptionsCache(CudaCache &cc, const OptionsCache &oc)
Definition: compilation_cache.h:93
MappingOptions mappingOptions
Definition: compilation_cache.h:281
static void dumpCacheToProtobuf(const std::string &filename)
Definition: compilation_cache-inl.h:48
CachedEntry * searchKernel(const std::string &id, const MappingOptions &options, const std::vector< detail::TensorInfo > &inputs, const std::vector< detail::TensorInfo > &outputs)
std::string cudaSource
Definition: compilation_cache.h:149
std::string deviceStr
Definition: compilation_cache.h:402
static auto searchKernelImpl(C &c, const std::string &id, const std::vector< InputTy > &inputs, const std::vector< InputTy > &outputs) -> decltype(c.searchKernel(id, inputs, outputs))
std::vector< detail::TensorInfo > inputs
Definition: compilation_cache.h:272
Block block
Definition: compilation_cache.h:411
static auto searchKernelImpl(C &c, const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs) -> decltype(c.searchKernel(id, inputs, outputs))
Definition: compilation_cache-inl.h:131
ManualCudaCacheEntryProto toProtobuf() const
void cacheKernel(const std::string &id, const MappingOptions &options, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &kernelSpecializedName, const std::vector< int > &kernelParameters, const std::string &cudaSource, const Grid &grid, const Block &block)
Definition: compilation_cache.h:81
bool operator==(const DLTensor *t) const
static void loadCacheFromProtobuf(const std::string &filename)
Definition: compilation_cache-inl.h:60
Grid grid
Definition: compilation_cache.h:104
decltype(entries_)::const_iterator end() const
OptionsCacheProto Protobuf
Definition: compilation_cache.h:231
Key key
Definition: compilation_cache.h:155
static std::shared_ptr< CudaCache > & getGlobalSharedCache()
void removeEntriesNotInOptionsCache(const OptionsCache &oc)
Block block
Definition: compilation_cache.h:105
size_t size() const
Definition: compilation_cache-inl.h:85
std::vector< detail::TensorInfo > outputs
Definition: compilation_cache.h:143
CacheEntrySameKeyDifferentValue(const std::string &what_arg)
Definition: compilation_cache.h:83
Definition: compilation_cache.h:139
Key key
Definition: compilation_cache.h:413
uint64_t alignment
Definition: compilation_cache.h:44
Grid grid
Definition: compilation_cache.h:152
static void disableCache()
Definition: compilation_cache-inl.h:34
decltype(entries_)::const_iterator begin() const
std::string specializedName
Definition: compilation_cache.h:102
Key key
Definition: compilation_cache.h:284
ManualCudaCache()=default
std::vector< Duration > recordedRuntimes
Definition: compilation_cache.h:326
std::unique_ptr< CudaCache::RetrievalResult > retrieveKernel(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs) const
std::string kernelSpecializedName
Definition: compilation_cache.h:408
std::string id
Definition: compilation_cache.h:271
static void enableCache()
Definition: compilation_cache-inl.h:29
static auto searchKernelImpl(C &c, const std::string &id, const MappingOptions &options, const std::vector< TensorTy > &inputs, const std::vector< TensorTy > &outputs) -> decltype(c.searchKernel(id, options, inputs, outputs))
void cacheKernel(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &kernelSpecializedName, const std::vector< int > &kernelParameters, const std::string &cudaSource, const Grid &grid, const Block &block)
Definition: mapping_options.h:336
OptionsCacheProto toProtobuf() const
std::chrono::high_resolution_clock::duration Duration
Definition: rtc.h:31
std::vector< int > kernelParameters
Definition: compilation_cache.h:409
std::vector< RetrievalResult > retrieveOptionsAndRuntimes(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs) const
std::vector< Values > values
Definition: compilation_cache.h:285
OptionsCacheEntryProto toProtobuf() const
TensorInfo(const DLTensor *t)
std::vector< CachedEntry > entries_
Definition: compilation_cache.h:418
static std::shared_ptr< ManualCudaCache > & getGlobalSharedCache()
std::string gitVersion
Definition: compilation_cache.h:403
Values values
Definition: compilation_cache.h:414
int numberCacheAttemps
Definition: compilation_cache.h:74
CudaCacheEntryProto toProtobuf() const
std::unique_ptr< RetrievalResult > retrieveKernel(const std::string &id, const MappingOptions &options, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs) const
CacheEntrySameKeyDifferentValue(const char *what_arg)
Definition: compilation_cache.h:85
std::string deviceStr
Definition: compilation_cache.h:144
std::vector< CachedEntry > entries_
Definition: compilation_cache.h:160
Definition: compilation_cache.h:383
bool operator==(const std::vector< const DLTensor * > &inputsTensor, const std::vector< detail::TensorInfo > &inputsInfo)
Definition: compilation_cache.h:148
static std::shared_ptr< OptionsCache > & getGlobalSharedCache()
static std::shared_ptr< CC > getCache()
Definition: compilation_cache-inl.h:39
std::mutex mtx_
Definition: compilation_cache.h:78
std::vector< detail::TensorInfo > outputs
Definition: compilation_cache.h:273
Definition: compilation_cache.h:229
std::string gitVersion
Definition: compilation_cache.h:145
std::string gitVersion
Definition: compilation_cache.h:275
std::string cudaSource
Definition: compilation_cache.h:407
std::vector< CachedEntry > entries_
Definition: compilation_cache.h:289
Specializing CudaDim to differentiate between Block and Grid sizes.
Definition: mapping_options.h:196
CachedEntry(const std::string &id, const std::string &kernelSpecializedName, const std::vector< int > &kernelParameters, const Grid &grid, const Block &block, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &cudaSource, const std::string &deviceStr)
std::string makeCudaFilename(const std::string &filename)
std::vector< Duration > recordedRuntimes
Definition: compilation_cache.h:282
DLDataType dType
Definition: compilation_cache.h:45
std::vector< detail::TensorInfo > outputs
Definition: compilation_cache.h:401
Values(const MappingOptions &options, Duration runtime)
Definition: compilation_cache.h:278
void recordRuntime(const std::string &id, const MappingOptions &options, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, Duration runtime)
Definition: compilation_cache.h:123
std::vector< int64_t > strides
Definition: compilation_cache.h:43
Definition: compilation_cache.h:324
CachedEntry(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &deviceStr, const MappingOptions &options, Duration runtime)
Key(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &deviceStr, const std::string &gitVersion)
Block block
Definition: compilation_cache.h:153
int numberAttemptedRetrievals
Definition: compilation_cache.h:72
Values values
Definition: compilation_cache.h:156
std::unique_ptr< MappingOptions > retrieveBestOptions(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs) const
std::vector< int > parameters
Definition: compilation_cache.h:103
std::vector< detail::TensorInfo > inputs
Definition: compilation_cache.h:142
CachedEntry * searchKernel(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs)
Definition: compilation_cache.h:41
TensorInfoProto toProtobuf() const
CudaCacheProto toProtobuf() const
void clear()
Definition: compilation_cache-inl.h:91
bool operator<(const TensorInfo &t) const
int numberSuccessfulRetrievals
Definition: compilation_cache.h:73
Definition: compilation_cache.h:58
MappingOptions mappingOptions
Definition: compilation_cache.h:141
std::string deviceStr
Definition: compilation_cache.h:274
Grid grid
Definition: compilation_cache.h:410
std::vector< int > kernelParameters
Definition: compilation_cache.h:151
std::string source
Definition: compilation_cache.h:101
std::vector< MappingOptions > retrieveTopKOptions(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, size_t k) const
std::vector< int64_t > shape
Definition: compilation_cache.h:42
std::string id
Definition: compilation_cache.h:399
ManualCudaCacheProto toProtobuf() const
Definition: compilation_cache.h:364
Definition: compilation_cache.h:247