TensorComprehensions/api/compilation__cache-inl_8h_source.html

 #pragma once


 #include <sys/stat.h>

 #include <algorithm>

 #include <fstream>

 #include <string>


 #include <glog/logging.h>

 #include <version.h>


 namespace tc {


 template <typename CC>

 void Cache<CC>::enableCache() {

   CC::getGlobalSharedCache() = std::make_shared<CC>();

 }


 template <typename CC>

 void Cache<CC>::disableCache() {

   CC::getGlobalSharedCache() = nullptr;

 }


 template <typename CC>

 std::shared_ptr<CC> Cache<CC>::getCache() {

   if (not cacheEnabled()) {

     throw std::runtime_error(

         "EnableCache or LoadCacheFromProtobuf must be called before using the cache.");

   }

   return CC::getGlobalSharedCache();

 }


 template <typename CC>

 void Cache<CC>::dumpCacheToProtobuf(const std::string& filename) {

   std::fstream serialized(

       filename, std::ios::binary | std::ios::trunc | std::ios::out);

   if (!serialized) {

     LOG(ERROR) << "Failed to open the output stream for dumping protobuf: "

                << filename;

   } else {

     getCache()->toProtobuf().SerializePartialToOstream(&serialized);

   }

 }


 template <typename CC>

 void Cache<CC>::loadCacheFromProtobuf(const std::string& filename) {

   typename CC::Protobuf buf;

   struct stat buffer = {0};

   if (stat(filename.c_str(), &buffer) == 0) {

     std::ifstream serialized(filename, std::ios::binary);

     buf.ParseFromIstream(&serialized);

   }

   loadCacheFromProtobuf(buf);

 }


 template <typename CC>

 template <typename Protobuf>

 void Cache<CC>::loadCacheFromProtobuf(const Protobuf& buf) {

   static_assert(

       std::is_same<Protobuf, typename CC::Protobuf>::value,

       "LoadCacheFromProtobuf called with invalide protobuf type.");

   CC::getGlobalSharedCache() = std::make_shared<CC>(buf);

 }


 template <typename CC>

 bool Cache<CC>::cacheEnabled() {

   return CC::getGlobalSharedCache() != nullptr;

 }


 template <typename CC>

 size_t Cache<CC>::size() const {

   std::lock_guard<std::mutex> lock(mtx_);

   return static_cast<const CC*>(this)->entries_.size();

 }


 template <typename CC>

 void Cache<CC>::clear() {

   std::lock_guard<std::mutex> lock(mtx_);

   numberAttemptedRetrievals = numberSuccessfulRetrievals = numberCacheAttemps =

       0;

   static_cast<CC*>(this)->entries_.clear();

 }


 template <typename C, typename InputTy> // deduces whether C is const or

 // non-const

 auto CudaCache::searchKernelImpl(

     C& c,

     const std::string& id,

     const MappingOptions& options,

     const std::vector<InputTy>& inputs,

     const std::vector<InputTy>& outputs)

     -> decltype(c.searchKernel(id, options, inputs, outputs)) {

   auto gpuStr = CudaGPUInfo::GPUInfo().GetCudaDeviceStr();

   auto it = std::find_if(

       c.entries_.begin(), c.entries_.end(), [&](const CachedEntry& c) {

         using tc::operator==;

         return id == c.key.id && options == c.key.mappingOptions &&

             inputs == c.key.inputs && outputs == c.key.outputs &&

             gpuStr == c.key.deviceStr;

       });

   if (it != c.entries_.end()) {

     if (it->key.gitVersion != tc::git_version) {

       std::cerr << "Proto version doesn't match. TC git version is: "

                 << tc::git_version

                 << " and Proto version is: " << it->key.gitVersion

                 << " .This proto might be incompatible"

                 << " with your TC binary and can break. Please autotune"

                 << " against the correct TC version." << std::endl;

     }

     return &*it;

   }

   return nullptr;

 }


 // deduces whether C is const or non-const

 template <typename C>

 auto OptionsCache::searchKernelImpl(

     C& c,

     const std::string& id,

     const std::vector<const DLTensor*>& inputs,

     const std::vector<const DLTensor*>& outputs)

     -> decltype(c.searchKernel(id, inputs, outputs)) {

   auto gpuStr = CudaGPUInfo::GPUInfo().GetCudaDeviceStr();

   auto it = std::find_if(

       c.entries_.begin(), c.entries_.end(), [&](const CachedEntry& c) {

         using tc::operator==;

         return id == c.key.id && inputs == c.key.inputs &&

             outputs == c.key.outputs && gpuStr == c.key.deviceStr;

       });

   if (it != c.entries_.end()) {

     if (it->key.gitVersion != tc::git_version) {

       std::cerr << "Proto version doesn't match. TC git version is: "

                 << tc::git_version

                 << " and Proto version is: " << it->key.gitVersion

                 << " .This proto might be incompatible"

                 << " with your TC binary and can break. Please autotune"

                 << " against the correct TC version." << std::endl;

       ;

     }

     return &*it;

   }

   return nullptr;

 }


 // deduces whether C is const or non-const

 template <typename C, typename TensorTy>

 auto ManualCudaCache::searchKernelImpl(

     C& c,

     const std::string& id,

     const std::vector<TensorTy>& inputs,

     const std::vector<TensorTy>& outputs)

     -> decltype(c.searchKernel(id, inputs, outputs)) {

   auto gpuStr = CudaGPUInfo::GPUInfo().GetCudaDeviceStr();

   auto it = std::find_if(

       c.entries_.begin(), c.entries_.end(), [&](const CachedEntry& c) {

         using tc::operator==;

         return id == c.key.id && inputs == c.key.inputs &&

             outputs == c.key.outputs && gpuStr == c.key.deviceStr;

       });

   if (it != c.entries_.end()) {

     std::cout << "RETURNING IT: " << it->key.gitVersion << std::endl;

     if (it->key.gitVersion != tc::git_version) {

       std::cerr << "Proto version doesn't match. TC git version is: "

                 << tc::git_version

                 << " and Proto version is: " << it->key.gitVersion

                 << " .This proto might be incompatible"

                 << " with your TC binary and can break. Please autotune"

                 << " against the correct TC version." << std::endl;

       ;

     }

     return &*it;

   }

   return nullptr;

 }


 } // namespace tc

tc::CudaGPUInfo::GPUInfo
static CudaGPUInfo & GPUInfo()

tc::Cache::cacheEnabled
static bool cacheEnabled()
Definition: compilation_cache-inl.h:80

tc::Cache::dumpCacheToProtobuf
static void dumpCacheToProtobuf(const std::string &filename)
Definition: compilation_cache-inl.h:48

tc::ManualCudaCache::searchKernelImpl
static auto searchKernelImpl(C &c, const std::string &id, const std::vector< InputTy > &inputs, const std::vector< InputTy > &outputs) -> decltype(c.searchKernel(id, inputs, outputs))

tc::OptionsCache::searchKernelImpl
static auto searchKernelImpl(C &c, const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs) -> decltype(c.searchKernel(id, inputs, outputs))
Definition: compilation_cache-inl.h:131

tc::Cache::loadCacheFromProtobuf
static void loadCacheFromProtobuf(const std::string &filename)
Definition: compilation_cache-inl.h:60

tc::Cache::size
size_t size() const
Definition: compilation_cache-inl.h:85

tc::Cache::disableCache
static void disableCache()
Definition: compilation_cache-inl.h:34

tc::Cache::enableCache
static void enableCache()
Definition: compilation_cache-inl.h:29

tc::CudaCache::searchKernelImpl
static auto searchKernelImpl(C &c, const std::string &id, const MappingOptions &options, const std::vector< TensorTy > &inputs, const std::vector< TensorTy > &outputs) -> decltype(c.searchKernel(id, options, inputs, outputs))

tc::MappingOptions
Definition: mapping_options.h:336

tc::ManualCudaCache::CachedEntry
Definition: compilation_cache.h:383

tc::Cache::getCache
static std::shared_ptr< CC > getCache()
Definition: compilation_cache-inl.h:39

tc::CudaCache::CachedEntry
Definition: compilation_cache.h:123

tc::CudaGPUInfo::GetCudaDeviceStr
std::string GetCudaDeviceStr() const

tc::Cache::clear
void clear()
Definition: compilation_cache-inl.h:91

tc::OptionsCache::CachedEntry
Definition: compilation_cache.h:247