TensorComprehensions/api/compilation__cache_8h_source.html

 #pragma once


 #include <cstdint>

 #include <memory>

 #include <mutex>

 #include <stdexcept>

 #include <string>

 #include <vector>


 #include <dlpack/dlpack.h>


 #include <compcache.pb.h>


 #include "tc/core/mapping_options.h"

 #include "tc/core/utils/cuda_info.h"


 namespace tc {


 namespace detail {

 struct TensorInfo {

   std::vector<int64_t> shape;

   std::vector<int64_t> strides;

   uint64_t alignment;

   DLDataType dType;


   TensorInfo(const DLTensor* t);

   TensorInfo(const TensorInfoProto& buf);


   bool operator==(const DLTensor* t) const;

   bool operator==(const TensorInfo& t) const;

   bool operator<(const TensorInfo& t) const;

   TensorInfoProto toProtobuf() const;

 };

 } // namespace detail


 template <typename CC>

 class Cache {

  public:

   static void enableCache();

   static void disableCache();

   static void dumpCacheToProtobuf(const std::string& filename);

   static void loadCacheFromProtobuf(const std::string& filename);

   template <typename Protobuf>

   static void loadCacheFromProtobuf(const Protobuf& buf);

   static std::shared_ptr<CC> getCache();

   static bool cacheEnabled();


   size_t size() const;

   void clear();


   mutable int numberAttemptedRetrievals = 0;

   mutable int numberSuccessfulRetrievals = 0;

   mutable int numberCacheAttemps = 0;


  protected:

   // XXX:this should be a std or boost shared_mutex

   mutable std::mutex mtx_;

 };


 class CacheEntrySameKeyDifferentValue : public std::invalid_argument {

  public:

   explicit CacheEntrySameKeyDifferentValue(const std::string& what_arg)

       : invalid_argument(what_arg) {}

   explicit CacheEntrySameKeyDifferentValue(const char* what_arg)

       : invalid_argument(what_arg) {}

 };


 class OptionsCache;

 class CudaCache : public Cache<CudaCache> {

  private:

   friend class Cache<CudaCache>;

   using Protobuf = CudaCacheProto;

   static std::shared_ptr<CudaCache>& getGlobalSharedCache();


  public:

   struct RetrievalResult {

     std::string source;

     std::string specializedName;

     std::vector<int> parameters;

     Grid grid;

     Block block;

   };


   struct CachedEntry {

     CachedEntry(

         const std::string& id,

         const std::string& kernelSpecializedName,

         const std::vector<int>& kernelParameters,

         const Grid& grid,

         const Block& block,

         const MappingOptions& mappingOptions,

         const std::vector<const DLTensor*>& inputs,

         const std::vector<const DLTensor*>& outputs,

         const std::string& cudaSource,

         const std::string& deviceStr);


     CachedEntry(const CudaCacheEntryProto& buf);

     CudaCacheEntryProto toProtobuf() const;


     struct Key {

       std::string id;

       MappingOptions mappingOptions;

       std::vector<detail::TensorInfo> inputs;

       std::vector<detail::TensorInfo> outputs;

       std::string deviceStr;

       std::string gitVersion;

     };


     struct Values {

       std::string cudaSource;

       std::string kernelSpecializedName;

       std::vector<int> kernelParameters;

       Grid grid;

       Block block;

     };

     Key key;

     Values values;

   };


  private:

   std::vector<CachedEntry> entries_;


   CachedEntry* searchKernel(

       const std::string& id,

       const MappingOptions& options,

       const std::vector<detail::TensorInfo>& inputs,

       const std::vector<detail::TensorInfo>& outputs);

   CachedEntry* searchKernel(

       const std::string& id,

       const MappingOptions& options,

       const std::vector<const DLTensor*>& inputs,

       const std::vector<const DLTensor*>& outputs);

   const CachedEntry* searchKernel(

       const std::string& id,

       const MappingOptions& options,

       const std::vector<const DLTensor*>& inputs,

       const std::vector<const DLTensor*>& outputs) const;


   // deduces whether C is const or non-const

   template <typename C, typename TensorTy>

   static auto searchKernelImpl(

       C& c,

       const std::string& id,

       const MappingOptions& options,

       const std::vector<TensorTy>& inputs,

       const std::vector<TensorTy>& outputs)

       -> decltype(c.searchKernel(id, options, inputs, outputs));


  public:

   CudaCache() = default;

   CudaCache(const CudaCacheProto& buf);

   CudaCacheProto toProtobuf() const;


   void cacheKernel(

       const std::string& id,

       const MappingOptions& options,

       const std::vector<const DLTensor*>& inputs,

       const std::vector<const DLTensor*>& outputs,

       const std::string& kernelSpecializedName,

       const std::vector<int>& kernelParameters,

       const std::string& cudaSource,

       const Grid& grid,

       const Block& block);


   std::unique_ptr<RetrievalResult> retrieveKernel(

       const std::string& id,

       const MappingOptions& options,

       const std::vector<const DLTensor*>& inputs,

       const std::vector<const DLTensor*>& outputs) const;


   void removeEntriesNotInOptionsCache(const OptionsCache& oc);

 };


 class OptionsCache : public Cache<OptionsCache> {

   friend class Cache<OptionsCache>;

   using Protobuf = OptionsCacheProto;

   static std::shared_ptr<OptionsCache>& getGlobalSharedCache();


  public:

   struct CachedEntry {

     CachedEntry(

         const std::string& id,

         const std::vector<const DLTensor*>& inputs,

         const std::vector<const DLTensor*>& outputs,

         const std::string& deviceStr,

         const MappingOptions& options,

         Duration runtime);

     CachedEntry(const OptionsCacheEntryProto& buf);

     OptionsCacheEntryProto toProtobuf() const;


     struct Key {

       Key(const std::string& id,

           const std::vector<const DLTensor*>& inputs,

           const std::vector<const DLTensor*>& outputs,

           const std::string& deviceStr,

           const std::string& gitVersion);


       Key(const std::string& id,

           std::vector<detail::TensorInfo>&& inputs,

           std::vector<detail::TensorInfo>&& outputs,

           const std::string& deviceStr,

           const std::string& gitVersion);


       std::string id;

       std::vector<detail::TensorInfo> inputs;

       std::vector<detail::TensorInfo> outputs;

       std::string deviceStr;

       std::string gitVersion;

     };


     struct Values {

       Values(const MappingOptions& options, Duration runtime);

       Values(const MappingOptions& options, std::vector<Duration>&& runtimes);

       MappingOptions mappingOptions;

       std::vector<Duration> recordedRuntimes;

     };

     Key key;

     std::vector<Values> values;

   };


  private:

   std::vector<CachedEntry> entries_;


   CachedEntry* searchKernel(

       const std::string& id,

       const std::vector<const DLTensor*>& inputs,

       const std::vector<const DLTensor*>& outputs);

   const CachedEntry* searchKernel(

       const std::string& id,

       const std::vector<const DLTensor*>& input,

       const std::vector<const DLTensor*>& outputs) const;


   // deduces whether C is const or non-const

   template <typename C>

   static auto searchKernelImpl(

       C& c,

       const std::string& id,

       const std::vector<const DLTensor*>& inputs,

       const std::vector<const DLTensor*>& outputs)

       -> decltype(c.searchKernel(id, inputs, outputs));


  public:

   OptionsCache() = default;

   OptionsCache(const OptionsCacheProto& buf);


   decltype(entries_)::const_iterator begin() const;

   decltype(entries_)::const_iterator end() const;


   OptionsCacheProto toProtobuf() const;

   struct RetrievalResult {

     MappingOptions options;

     std::vector<Duration> recordedRuntimes;

   };


   // returns the sum of cache entry sizes (that is a single cache entry can have

   // multiple options and profiling information associated with it)

   size_t totalSize() const;


   void recordRuntime(

       const std::string& id,

       const MappingOptions& options,

       const std::vector<const DLTensor*>& inputs,

       const std::vector<const DLTensor*>& outputs,

       Duration runtime);


   std::vector<RetrievalResult> retrieveOptionsAndRuntimes(

       const std::string& id,

       const std::vector<const DLTensor*>& inputs,

       const std::vector<const DLTensor*>& outputs) const;


   std::unique_ptr<MappingOptions> retrieveBestOptions(

       const std::string& id,

       const std::vector<const DLTensor*>& inputs,

       const std::vector<const DLTensor*>& outputs) const;


   std::vector<MappingOptions> retrieveTopKOptions(

       const std::string& id,

       const std::vector<const DLTensor*>& inputs,

       const std::vector<const DLTensor*>& outputs,

       size_t k) const;


   // Only (up to) numberToKeep entries per operation (combination of id and

   // input info) are kept in the cache. The best performing versions are kept

   void keepOnlyBestCandidates(size_t numberToKeep);

 };


 /*

  * ManualCudaCache stores the manually injected source of Cuda kernels

  */

 class ManualCudaCache : public Cache<ManualCudaCache> {

  private:

   friend class Cache<ManualCudaCache>;

   using Protobuf = ManualCudaCacheProto;

   static std::shared_ptr<ManualCudaCache>& getGlobalSharedCache();


  public:

   /*

    *A CudaCache holds multiple CachedEntry's.

    *Each CachedEntry is split to two conceptual parts the key and the values.

    *The values are:

    *                 the specialized (wrt inputs) Cuda source code,

    *                 the Cuda block and grid dimensions

    *The key is:

    *                 the kernel/op's unique id (string),

    *                 the specialized input dimensions,

    *                 the target architecture (string),

    *                 tc's version (string),

    */

   struct CachedEntry {

     CachedEntry(

         const std::string& id,

         const std::string& kernelSpecializedName,

         const std::vector<int>& kernelParameters,

         const Grid& grid,

         const Block& block,

         const std::vector<const DLTensor*>& inputs,

         const std::vector<const DLTensor*>& outputs,

         const std::string& cudaSource,

         const std::string& deviceStr);


     CachedEntry(const ManualCudaCacheEntryProto& buf);

     ManualCudaCacheEntryProto toProtobuf() const;


     struct Key {

       std::string id;

       std::vector<detail::TensorInfo> inputs;

       std::vector<detail::TensorInfo> outputs;

       std::string deviceStr;

       std::string gitVersion;

     };


     struct Values {

       std::string cudaSource;

       std::string kernelSpecializedName;

       std::vector<int> kernelParameters;

       Grid grid;

       Block block;

     };

     Key key;

     Values values;

   };


  private:

   std::vector<CachedEntry> entries_;


   /*

    *SearchKernel (through SearchKernelImpl) searches op in the cache

    *if a cached entry that corresponds to the op's TargetDevice and the

    *shape of inputs matches it is returned

    */

   CachedEntry* searchKernel(

       const std::string& id,

       const std::vector<detail::TensorInfo>& inputs,

       const std::vector<detail::TensorInfo>& outputs);

   CachedEntry* searchKernel(

       const std::string& id,

       const std::vector<const DLTensor*>& inputs,

       const std::vector<const DLTensor*>& outputs);

   const CachedEntry* searchKernel(

       const std::string& id,

       const std::vector<const DLTensor*>& inputs,

       const std::vector<const DLTensor*>& outputs) const;


   // deduces whether C is const or non-const

   template <typename C, typename InputTy>

   static auto searchKernelImpl(

       C& c,

       const std::string& id,

       const std::vector<InputTy>& inputs,

       const std::vector<InputTy>& outputs)

       -> decltype(c.searchKernel(id, inputs, outputs));


  public:

   ManualCudaCache() = default;

   ManualCudaCache(const ManualCudaCacheProto& buf);

   ManualCudaCacheProto toProtobuf() const;


   /*

    *Stores (cudaSource, grid, block, specializedName, parameters)

    *in the cache with key (id, input shapes, output shapes,

    *target device). If the key already exist in the cache,

    *the values are replaced.

    */

   void cacheKernel(

       const std::string& id,

       const std::vector<const DLTensor*>& inputs,

       const std::vector<const DLTensor*>& outputs,

       const std::string& kernelSpecializedName,

       const std::vector<int>& kernelParameters,

       const std::string& cudaSource,

       const Grid& grid,

       const Block& block);


   /*

    *Returns the cache entry that matches

    *op(id, target device) and inputs' shapes.

    */

   std::unique_ptr<CudaCache::RetrievalResult> retrieveKernel(

       const std::string& id,

       const std::vector<const DLTensor*>& inputs,

       const std::vector<const DLTensor*>& outputs) const;

 };


 void removeFromCudaCacheEntriesNotInOptionsCache(

     CudaCache& cc,

     const OptionsCache& oc);


 bool operator==(

     const std::vector<const DLTensor*>& inputsTensor,

     const std::vector<detail::TensorInfo>& inputsInfo);


 std::string makeOptionsFilename(const std::string& filename);


 std::string makeCudaFilename(const std::string& filename);


 } // namespace tc

 #include "tc/core/compilation_cache-inl.h"

tc::ManualCudaCache::CachedEntry::Key::inputs
std::vector< detail::TensorInfo > inputs
Definition: compilation_cache.h:400

tc::makeOptionsFilename
std::string makeOptionsFilename(const std::string &filename)

tc::CudaCache::Protobuf
CudaCacheProto Protobuf
Definition: compilation_cache.h:96

tc::CudaCache::CachedEntry::Key::id
std::string id
Definition: compilation_cache.h:140

tc::ManualCudaCache::searchKernel
CachedEntry * searchKernel(const std::string &id, const std::vector< detail::TensorInfo > &inputs, const std::vector< detail::TensorInfo > &outputs)

tc::ManualCudaCache::CachedEntry::Key
Definition: compilation_cache.h:398

tc::OptionsCache::CachedEntry::Key
Definition: compilation_cache.h:258

compilation_cache-inl.h

tc::ManualCudaCache::Protobuf
ManualCudaCacheProto Protobuf
Definition: compilation_cache.h:367

tc::OptionsCache::RetrievalResult::options
MappingOptions options
Definition: compilation_cache.h:325

tc::CudaCache::RetrievalResult
Definition: compilation_cache.h:100

tc::Cache::cacheEnabled
static bool cacheEnabled()
Definition: compilation_cache-inl.h:80

tc::Grid
Specializing CudaDim to differentiate between Block and Grid sizes.
Definition: mapping_options.h:208

tc::OptionsCache::keepOnlyBestCandidates
void keepOnlyBestCandidates(size_t numberToKeep)

tc::CudaCache::CachedEntry::CachedEntry
CachedEntry(const std::string &id, const std::string &kernelSpecializedName, const std::vector< int > &kernelParameters, const Grid &grid, const Block &block, const MappingOptions &mappingOptions, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &cudaSource, const std::string &deviceStr)

tc::CudaCache::CachedEntry::Values::kernelSpecializedName
std::string kernelSpecializedName
Definition: compilation_cache.h:150

tc::ManualCudaCache::CachedEntry::Values
Definition: compilation_cache.h:406

tc::removeFromCudaCacheEntriesNotInOptionsCache
void removeFromCudaCacheEntriesNotInOptionsCache(CudaCache &cc, const OptionsCache &oc)

tc::CudaCache
Definition: compilation_cache.h:93

tc::OptionsCache::CachedEntry::Values::mappingOptions
MappingOptions mappingOptions
Definition: compilation_cache.h:281

tc::Cache::dumpCacheToProtobuf
static void dumpCacheToProtobuf(const std::string &filename)
Definition: compilation_cache-inl.h:48

tc::CudaCache::searchKernel
CachedEntry * searchKernel(const std::string &id, const MappingOptions &options, const std::vector< detail::TensorInfo > &inputs, const std::vector< detail::TensorInfo > &outputs)

tc::OptionsCache::OptionsCache
OptionsCache()=default

tc::CudaCache::CachedEntry::Values::cudaSource
std::string cudaSource
Definition: compilation_cache.h:149

tc::ManualCudaCache::CachedEntry::Key::deviceStr
std::string deviceStr
Definition: compilation_cache.h:402

tc::ManualCudaCache::searchKernelImpl
static auto searchKernelImpl(C &c, const std::string &id, const std::vector< InputTy > &inputs, const std::vector< InputTy > &outputs) -> decltype(c.searchKernel(id, inputs, outputs))

tc::OptionsCache::CachedEntry::Key::inputs
std::vector< detail::TensorInfo > inputs
Definition: compilation_cache.h:272

tc::ManualCudaCache::CachedEntry::Values::block
Block block
Definition: compilation_cache.h:411

tc::OptionsCache::searchKernelImpl
static auto searchKernelImpl(C &c, const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs) -> decltype(c.searchKernel(id, inputs, outputs))
Definition: compilation_cache-inl.h:131

tc::ManualCudaCache::CachedEntry::toProtobuf
ManualCudaCacheEntryProto toProtobuf() const

tc::CudaCache::cacheKernel
void cacheKernel(const std::string &id, const MappingOptions &options, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &kernelSpecializedName, const std::vector< int > &kernelParameters, const std::string &cudaSource, const Grid &grid, const Block &block)

tc::CacheEntrySameKeyDifferentValue
Definition: compilation_cache.h:81

tc::detail::TensorInfo::operator==
bool operator==(const DLTensor *t) const

tc::Cache::loadCacheFromProtobuf
static void loadCacheFromProtobuf(const std::string &filename)
Definition: compilation_cache-inl.h:60

tc::CudaCache::RetrievalResult::grid
Grid grid
Definition: compilation_cache.h:104

tc::OptionsCache::end
decltype(entries_)::const_iterator end() const

tc::OptionsCache::Protobuf
OptionsCacheProto Protobuf
Definition: compilation_cache.h:231

tc::CudaCache::CachedEntry::key
Key key
Definition: compilation_cache.h:155

tc::CudaCache::getGlobalSharedCache
static std::shared_ptr< CudaCache > & getGlobalSharedCache()

tc::CudaCache::removeEntriesNotInOptionsCache
void removeEntriesNotInOptionsCache(const OptionsCache &oc)

tc::CudaCache::RetrievalResult::block
Block block
Definition: compilation_cache.h:105

tc::Cache::size
size_t size() const
Definition: compilation_cache-inl.h:85

tc::CudaCache::CachedEntry::Key::outputs
std::vector< detail::TensorInfo > outputs
Definition: compilation_cache.h:143

tc::CacheEntrySameKeyDifferentValue::CacheEntrySameKeyDifferentValue
CacheEntrySameKeyDifferentValue(const std::string &what_arg)
Definition: compilation_cache.h:83

tc::CudaCache::CachedEntry::Key
Definition: compilation_cache.h:139

tc::ManualCudaCache::CachedEntry::key
Key key
Definition: compilation_cache.h:413

tc::detail::TensorInfo::alignment
uint64_t alignment
Definition: compilation_cache.h:44

tc::CudaCache::CachedEntry::Values::grid
Grid grid
Definition: compilation_cache.h:152

tc::Cache::disableCache
static void disableCache()
Definition: compilation_cache-inl.h:34

tc::OptionsCache::begin
decltype(entries_)::const_iterator begin() const

tc::CudaCache::RetrievalResult::specializedName
std::string specializedName
Definition: compilation_cache.h:102

tc::OptionsCache::CachedEntry::key
Key key
Definition: compilation_cache.h:284

tc::ManualCudaCache::ManualCudaCache
ManualCudaCache()=default

tc::OptionsCache::RetrievalResult::recordedRuntimes
std::vector< Duration > recordedRuntimes
Definition: compilation_cache.h:326

tc::ManualCudaCache::retrieveKernel
std::unique_ptr< CudaCache::RetrievalResult > retrieveKernel(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs) const

tc::ManualCudaCache::CachedEntry::Values::kernelSpecializedName
std::string kernelSpecializedName
Definition: compilation_cache.h:408

tc::OptionsCache::CachedEntry::Key::id
std::string id
Definition: compilation_cache.h:271

tc::Cache::enableCache
static void enableCache()
Definition: compilation_cache-inl.h:29

tc::CudaCache::searchKernelImpl
static auto searchKernelImpl(C &c, const std::string &id, const MappingOptions &options, const std::vector< TensorTy > &inputs, const std::vector< TensorTy > &outputs) -> decltype(c.searchKernel(id, options, inputs, outputs))

tc::ManualCudaCache::cacheKernel
void cacheKernel(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &kernelSpecializedName, const std::vector< int > &kernelParameters, const std::string &cudaSource, const Grid &grid, const Block &block)

tc::MappingOptions
Definition: mapping_options.h:336

tc::OptionsCache::toProtobuf
OptionsCacheProto toProtobuf() const

tc::Duration
std::chrono::high_resolution_clock::duration Duration
Definition: rtc.h:31

tc::CudaCache::CudaCache
CudaCache()=default

tc::ManualCudaCache::CachedEntry::Values::kernelParameters
std::vector< int > kernelParameters
Definition: compilation_cache.h:409

tc::OptionsCache::retrieveOptionsAndRuntimes
std::vector< RetrievalResult > retrieveOptionsAndRuntimes(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs) const

tc::OptionsCache::CachedEntry::values
std::vector< Values > values
Definition: compilation_cache.h:285

tc::OptionsCache::CachedEntry::toProtobuf
OptionsCacheEntryProto toProtobuf() const

tc::detail::TensorInfo::TensorInfo
TensorInfo(const DLTensor *t)

tc::OptionsCache::totalSize
size_t totalSize() const

tc::ManualCudaCache::entries_
std::vector< CachedEntry > entries_
Definition: compilation_cache.h:418

tc::ManualCudaCache::getGlobalSharedCache
static std::shared_ptr< ManualCudaCache > & getGlobalSharedCache()

tc::ManualCudaCache::CachedEntry::Key::gitVersion
std::string gitVersion
Definition: compilation_cache.h:403

tc::ManualCudaCache::CachedEntry::values
Values values
Definition: compilation_cache.h:414

tc::Cache::numberCacheAttemps
int numberCacheAttemps
Definition: compilation_cache.h:74

tc::CudaCache::CachedEntry::toProtobuf
CudaCacheEntryProto toProtobuf() const

tc::CudaCache::retrieveKernel
std::unique_ptr< RetrievalResult > retrieveKernel(const std::string &id, const MappingOptions &options, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs) const

tc::CacheEntrySameKeyDifferentValue::CacheEntrySameKeyDifferentValue
CacheEntrySameKeyDifferentValue(const char *what_arg)
Definition: compilation_cache.h:85

tc::CudaCache::CachedEntry::Key::deviceStr
std::string deviceStr
Definition: compilation_cache.h:144

tc::CudaCache::entries_
std::vector< CachedEntry > entries_
Definition: compilation_cache.h:160

tc::ManualCudaCache::CachedEntry
Definition: compilation_cache.h:383

tc::operator==
bool operator==(const std::vector< const DLTensor * > &inputsTensor, const std::vector< detail::TensorInfo > &inputsInfo)

tc::CudaCache::CachedEntry::Values
Definition: compilation_cache.h:148

tc::OptionsCache::getGlobalSharedCache
static std::shared_ptr< OptionsCache > & getGlobalSharedCache()

tc::Cache::getCache
static std::shared_ptr< CC > getCache()
Definition: compilation_cache-inl.h:39

tc::Cache::mtx_
std::mutex mtx_
Definition: compilation_cache.h:78

tc::OptionsCache::CachedEntry::Key::outputs
std::vector< detail::TensorInfo > outputs
Definition: compilation_cache.h:273

tc::OptionsCache
Definition: compilation_cache.h:229

tc::CudaCache::CachedEntry::Key::gitVersion
std::string gitVersion
Definition: compilation_cache.h:145

tc::OptionsCache::CachedEntry::Key::gitVersion
std::string gitVersion
Definition: compilation_cache.h:275

tc::ManualCudaCache::CachedEntry::Values::cudaSource
std::string cudaSource
Definition: compilation_cache.h:407

cuda_info.h

tc::OptionsCache::entries_
std::vector< CachedEntry > entries_
Definition: compilation_cache.h:289

tc::Block
Specializing CudaDim to differentiate between Block and Grid sizes.
Definition: mapping_options.h:196

tc::ManualCudaCache::CachedEntry::CachedEntry
CachedEntry(const std::string &id, const std::string &kernelSpecializedName, const std::vector< int > &kernelParameters, const Grid &grid, const Block &block, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &cudaSource, const std::string &deviceStr)

tc::makeCudaFilename
std::string makeCudaFilename(const std::string &filename)

tc::OptionsCache::CachedEntry::Values::recordedRuntimes
std::vector< Duration > recordedRuntimes
Definition: compilation_cache.h:282

tc::detail::TensorInfo::dType
DLDataType dType
Definition: compilation_cache.h:45

tc::ManualCudaCache::CachedEntry::Key::outputs
std::vector< detail::TensorInfo > outputs
Definition: compilation_cache.h:401

tc::OptionsCache::CachedEntry::Values::Values
Values(const MappingOptions &options, Duration runtime)

tc::OptionsCache::CachedEntry::Values
Definition: compilation_cache.h:278

tc::OptionsCache::recordRuntime
void recordRuntime(const std::string &id, const MappingOptions &options, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, Duration runtime)

tc::CudaCache::CachedEntry
Definition: compilation_cache.h:123

tc::detail::TensorInfo::strides
std::vector< int64_t > strides
Definition: compilation_cache.h:43

tc::OptionsCache::RetrievalResult
Definition: compilation_cache.h:324

tc::OptionsCache::CachedEntry::CachedEntry
CachedEntry(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &deviceStr, const MappingOptions &options, Duration runtime)

tc::OptionsCache::CachedEntry::Key::Key
Key(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &deviceStr, const std::string &gitVersion)

tc::CudaCache::CachedEntry::Values::block
Block block
Definition: compilation_cache.h:153

tc::Cache::numberAttemptedRetrievals
int numberAttemptedRetrievals
Definition: compilation_cache.h:72

tc::CudaCache::CachedEntry::values
Values values
Definition: compilation_cache.h:156

tc::OptionsCache::retrieveBestOptions
std::unique_ptr< MappingOptions > retrieveBestOptions(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs) const

tc::CudaCache::RetrievalResult::parameters
std::vector< int > parameters
Definition: compilation_cache.h:103

tc::CudaCache::CachedEntry::Key::inputs
std::vector< detail::TensorInfo > inputs
Definition: compilation_cache.h:142

tc::OptionsCache::searchKernel
CachedEntry * searchKernel(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs)

tc::detail::TensorInfo
Definition: compilation_cache.h:41

mapping_options.h

tc::detail::TensorInfo::toProtobuf
TensorInfoProto toProtobuf() const

tc::CudaCache::toProtobuf
CudaCacheProto toProtobuf() const

tc::Cache::clear
void clear()
Definition: compilation_cache-inl.h:91

tc::detail::TensorInfo::operator<
bool operator<(const TensorInfo &t) const

tc::Cache::numberSuccessfulRetrievals
int numberSuccessfulRetrievals
Definition: compilation_cache.h:73

tc::Cache
Definition: compilation_cache.h:58

tc::CudaCache::CachedEntry::Key::mappingOptions
MappingOptions mappingOptions
Definition: compilation_cache.h:141

tc::OptionsCache::CachedEntry::Key::deviceStr
std::string deviceStr
Definition: compilation_cache.h:274

tc::ManualCudaCache::CachedEntry::Values::grid
Grid grid
Definition: compilation_cache.h:410

tc::CudaCache::CachedEntry::Values::kernelParameters
std::vector< int > kernelParameters
Definition: compilation_cache.h:151

tc::CudaCache::RetrievalResult::source
std::string source
Definition: compilation_cache.h:101

tc::OptionsCache::retrieveTopKOptions
std::vector< MappingOptions > retrieveTopKOptions(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, size_t k) const

tc::detail::TensorInfo::shape
std::vector< int64_t > shape
Definition: compilation_cache.h:42

tc::ManualCudaCache::CachedEntry::Key::id
std::string id
Definition: compilation_cache.h:399

tc::ManualCudaCache::toProtobuf
ManualCudaCacheProto toProtobuf() const

tc::ManualCudaCache
Definition: compilation_cache.h:364

tc::OptionsCache::CachedEntry
Definition: compilation_cache.h:247