#include <compilation_cache.h>
|
| CudaCache ()=default |
|
| CudaCache (const CudaCacheProto &buf) |
|
CudaCacheProto | toProtobuf () const |
|
void | cacheKernel (const std::string &id, const MappingOptions &options, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &kernelSpecializedName, const std::vector< int > &kernelParameters, const std::string &cudaSource, const Grid &grid, const Block &block) |
|
std::unique_ptr< RetrievalResult > | retrieveKernel (const std::string &id, const MappingOptions &options, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs) const |
|
void | removeEntriesNotInOptionsCache (const OptionsCache &oc) |
|
template<typename C , typename InputTy > |
auto | searchKernelImpl (C &c, const std::string &id, const MappingOptions &options, const std::vector< InputTy > &inputs, const std::vector< InputTy > &outputs) -> decltype(c.searchKernel(id, options, inputs, outputs)) |
|
size_t | size () const |
|
void | clear () |
|
|
static std::shared_ptr
< CudaCache > & | getGlobalSharedCache () |
|
template<typename C , typename TensorTy > |
static auto | searchKernelImpl (C &c, const std::string &id, const MappingOptions &options, const std::vector< TensorTy > &inputs, const std::vector< TensorTy > &outputs) -> decltype(c.searchKernel(id, options, inputs, outputs)) |
|
CudaCache stores the Cuda source of optimized kernels
tc::CudaCache::CudaCache |
( |
| ) |
|
|
default |
tc::CudaCache::CudaCache |
( |
const CudaCacheProto & |
buf | ) |
|
void tc::CudaCache::cacheKernel |
( |
const std::string & |
id, |
|
|
const MappingOptions & |
options, |
|
|
const std::vector< const DLTensor * > & |
inputs, |
|
|
const std::vector< const DLTensor * > & |
outputs, |
|
|
const std::string & |
kernelSpecializedName, |
|
|
const std::vector< int > & |
kernelParameters, |
|
|
const std::string & |
cudaSource, |
|
|
const Grid & |
grid, |
|
|
const Block & |
block |
|
) |
| |
If op was previously cached and the inputs' shape, isl options, and the target device are the same then this is a noop Else (cudaSource, grid, block) is stored in the cache
static std::shared_ptr<CudaCache>& tc::CudaCache::getGlobalSharedCache |
( |
| ) |
|
|
staticprivate |
void tc::CudaCache::removeEntriesNotInOptionsCache |
( |
const OptionsCache & |
oc | ) |
|
std::unique_ptr<RetrievalResult> tc::CudaCache::retrieveKernel |
( |
const std::string & |
id, |
|
|
const MappingOptions & |
options, |
|
|
const std::vector< const DLTensor * > & |
inputs, |
|
|
const std::vector< const DLTensor * > & |
outputs |
|
) |
| const |
Returns the cache entry that matches op (id, isl options, target device) and inputs' shapes.
SearchKernel (through SearchKernelImpl) searches op in the cache if a cached entry that corresponds to the op's configuration (MappingOptions and TargetDevice) and the shape of inputs matches it is returned
CachedEntry* tc::CudaCache::searchKernel |
( |
const std::string & |
id, |
|
|
const MappingOptions & |
options, |
|
|
const std::vector< const DLTensor * > & |
inputs, |
|
|
const std::vector< const DLTensor * > & |
outputs |
|
) |
| |
|
private |
const CachedEntry* tc::CudaCache::searchKernel |
( |
const std::string & |
id, |
|
|
const MappingOptions & |
options, |
|
|
const std::vector< const DLTensor * > & |
inputs, |
|
|
const std::vector< const DLTensor * > & |
outputs |
|
) |
| const |
|
private |
template<typename C , typename InputTy >
auto tc::CudaCache::searchKernelImpl |
( |
C & |
c, |
|
|
const std::string & |
id, |
|
|
const MappingOptions & |
options, |
|
|
const std::vector< InputTy > & |
inputs, |
|
|
const std::vector< InputTy > & |
outputs |
|
) |
| -> decltype(c.searchKernel(id, options, inputs, outputs)) |
template<typename C , typename TensorTy >
static auto tc::CudaCache::searchKernelImpl |
( |
C & |
c, |
|
|
const std::string & |
id, |
|
|
const MappingOptions & |
options, |
|
|
const std::vector< TensorTy > & |
inputs, |
|
|
const std::vector< TensorTy > & |
outputs |
|
) |
| -> decltype(c.searchKernel(id, options, inputs, outputs)) |
|
staticprivate |
CudaCacheProto tc::CudaCache::toProtobuf |
( |
| ) |
const |
The documentation for this class was generated from the following files: