Tensor Comprehensions
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
compilation_cache.h
Go to the documentation of this file.
1 
16 #pragma once
17 
18 #include <cstdint>
19 #include <memory>
20 #include <mutex>
21 #include <stdexcept>
22 #include <string>
23 #include <vector>
24 
25 #include <dlpack/dlpack.h>
26 
27 #include <compcache.pb.h>
28 
31 
32 namespace tc {
33 
34 namespace detail {
41 struct TensorInfo {
42  std::vector<int64_t> shape;
43  std::vector<int64_t> strides;
44  uint64_t alignment;
45  DLDataType dType;
46 
47  TensorInfo(const DLTensor* t);
48  TensorInfo(const TensorInfoProto& buf);
49 
50  bool operator==(const DLTensor* t) const;
51  bool operator==(const TensorInfo& t) const;
52  bool operator<(const TensorInfo& t) const;
53  TensorInfoProto toProtobuf() const;
54 };
55 } // namespace detail
56 
57 template <typename CC>
58 class Cache {
59  public:
60  static void enableCache();
61  static void disableCache();
62  static void dumpCacheToProtobuf(const std::string& filename);
63  static void loadCacheFromProtobuf(const std::string& filename);
64  template <typename Protobuf>
65  static void loadCacheFromProtobuf(const Protobuf& buf);
66  static std::shared_ptr<CC> getCache();
67  static bool cacheEnabled();
68 
69  size_t size() const;
70  void clear();
71 
72  mutable int numberAttemptedRetrievals = 0;
73  mutable int numberSuccessfulRetrievals = 0;
74  mutable int numberCacheAttemps = 0;
75 
76  protected:
77  // XXX:this should be a std or boost shared_mutex
78  mutable std::mutex mtx_;
79 };
80 
81 class CacheEntrySameKeyDifferentValue : public std::invalid_argument {
82  public:
83  explicit CacheEntrySameKeyDifferentValue(const std::string& what_arg)
84  : invalid_argument(what_arg) {}
85  explicit CacheEntrySameKeyDifferentValue(const char* what_arg)
86  : invalid_argument(what_arg) {}
87 };
88 
89 class OptionsCache;
93 class CudaCache : public Cache<CudaCache> {
94  private:
95  friend class Cache<CudaCache>;
96  using Protobuf = CudaCacheProto;
97  static std::shared_ptr<CudaCache>& getGlobalSharedCache();
98 
99  public:
101  std::string source;
102  std::string specializedName;
103  std::vector<int> parameters;
106  };
107 
123  struct CachedEntry {
124  CachedEntry(
125  const std::string& id,
126  const std::string& kernelSpecializedName,
127  const std::vector<int>& kernelParameters,
128  const Grid& grid,
129  const Block& block,
130  const MappingOptions& mappingOptions,
131  const std::vector<const DLTensor*>& inputs,
132  const std::vector<const DLTensor*>& outputs,
133  const std::string& cudaSource,
134  const std::string& deviceStr);
135 
136  CachedEntry(const CudaCacheEntryProto& buf);
137  CudaCacheEntryProto toProtobuf() const;
138 
139  struct Key {
140  std::string id;
142  std::vector<detail::TensorInfo> inputs;
143  std::vector<detail::TensorInfo> outputs;
144  std::string deviceStr;
145  std::string gitVersion;
146  };
147 
148  struct Values {
149  std::string cudaSource;
151  std::vector<int> kernelParameters;
154  };
157  };
158 
159  private:
160  std::vector<CachedEntry> entries_;
161 
170  const std::string& id,
171  const MappingOptions& options,
172  const std::vector<detail::TensorInfo>& inputs,
173  const std::vector<detail::TensorInfo>& outputs);
175  const std::string& id,
176  const MappingOptions& options,
177  const std::vector<const DLTensor*>& inputs,
178  const std::vector<const DLTensor*>& outputs);
179  const CachedEntry* searchKernel(
180  const std::string& id,
181  const MappingOptions& options,
182  const std::vector<const DLTensor*>& inputs,
183  const std::vector<const DLTensor*>& outputs) const;
184 
185  // deduces whether C is const or non-const
186  template <typename C, typename TensorTy>
187  static auto searchKernelImpl(
188  C& c,
189  const std::string& id,
190  const MappingOptions& options,
191  const std::vector<TensorTy>& inputs,
192  const std::vector<TensorTy>& outputs)
193  -> decltype(c.searchKernel(id, options, inputs, outputs));
194 
195  public:
196  CudaCache() = default;
197  CudaCache(const CudaCacheProto& buf);
198  CudaCacheProto toProtobuf() const;
199 
205  void cacheKernel(
206  const std::string& id,
207  const MappingOptions& options,
208  const std::vector<const DLTensor*>& inputs,
209  const std::vector<const DLTensor*>& outputs,
210  const std::string& kernelSpecializedName,
211  const std::vector<int>& kernelParameters,
212  const std::string& cudaSource,
213  const Grid& grid,
214  const Block& block);
215 
220  std::unique_ptr<RetrievalResult> retrieveKernel(
221  const std::string& id,
222  const MappingOptions& options,
223  const std::vector<const DLTensor*>& inputs,
224  const std::vector<const DLTensor*>& outputs) const;
225 
227 };
228 
229 class OptionsCache : public Cache<OptionsCache> {
230  friend class Cache<OptionsCache>;
231  using Protobuf = OptionsCacheProto;
232  static std::shared_ptr<OptionsCache>& getGlobalSharedCache();
233 
234  public:
247  struct CachedEntry {
248  CachedEntry(
249  const std::string& id,
250  const std::vector<const DLTensor*>& inputs,
251  const std::vector<const DLTensor*>& outputs,
252  const std::string& deviceStr,
253  const MappingOptions& options,
254  Duration runtime);
255  CachedEntry(const OptionsCacheEntryProto& buf);
256  OptionsCacheEntryProto toProtobuf() const;
257 
258  struct Key {
259  Key(const std::string& id,
260  const std::vector<const DLTensor*>& inputs,
261  const std::vector<const DLTensor*>& outputs,
262  const std::string& deviceStr,
263  const std::string& gitVersion);
264 
265  Key(const std::string& id,
266  std::vector<detail::TensorInfo>&& inputs,
267  std::vector<detail::TensorInfo>&& outputs,
268  const std::string& deviceStr,
269  const std::string& gitVersion);
270 
271  std::string id;
272  std::vector<detail::TensorInfo> inputs;
273  std::vector<detail::TensorInfo> outputs;
274  std::string deviceStr;
275  std::string gitVersion;
276  };
277 
278  struct Values {
279  Values(const MappingOptions& options, Duration runtime);
280  Values(const MappingOptions& options, std::vector<Duration>&& runtimes);
282  std::vector<Duration> recordedRuntimes;
283  };
285  std::vector<Values> values;
286  };
287 
288  private:
289  std::vector<CachedEntry> entries_;
290 
299  const std::string& id,
300  const std::vector<const DLTensor*>& inputs,
301  const std::vector<const DLTensor*>& outputs);
302  const CachedEntry* searchKernel(
303  const std::string& id,
304  const std::vector<const DLTensor*>& input,
305  const std::vector<const DLTensor*>& outputs) const;
306 
307  // deduces whether C is const or non-const
308  template <typename C>
309  static auto searchKernelImpl(
310  C& c,
311  const std::string& id,
312  const std::vector<const DLTensor*>& inputs,
313  const std::vector<const DLTensor*>& outputs)
314  -> decltype(c.searchKernel(id, inputs, outputs));
315 
316  public:
317  OptionsCache() = default;
318  OptionsCache(const OptionsCacheProto& buf);
319 
320  decltype(entries_)::const_iterator begin() const;
321  decltype(entries_)::const_iterator end() const;
322 
323  OptionsCacheProto toProtobuf() const;
326  std::vector<Duration> recordedRuntimes;
327  };
328 
329  // returns the sum of cache entry sizes (that is a single cache entry can have
330  // multiple options and profiling information associated with it)
331  size_t totalSize() const;
332 
333  void recordRuntime(
334  const std::string& id,
335  const MappingOptions& options,
336  const std::vector<const DLTensor*>& inputs,
337  const std::vector<const DLTensor*>& outputs,
338  Duration runtime);
339 
340  std::vector<RetrievalResult> retrieveOptionsAndRuntimes(
341  const std::string& id,
342  const std::vector<const DLTensor*>& inputs,
343  const std::vector<const DLTensor*>& outputs) const;
344 
345  std::unique_ptr<MappingOptions> retrieveBestOptions(
346  const std::string& id,
347  const std::vector<const DLTensor*>& inputs,
348  const std::vector<const DLTensor*>& outputs) const;
349 
350  std::vector<MappingOptions> retrieveTopKOptions(
351  const std::string& id,
352  const std::vector<const DLTensor*>& inputs,
353  const std::vector<const DLTensor*>& outputs,
354  size_t k) const;
355 
356  // Only (up to) numberToKeep entries per operation (combination of id and
357  // input info) are kept in the cache. The best performing versions are kept
358  void keepOnlyBestCandidates(size_t numberToKeep);
359 };
360 
361 /*
362  * ManualCudaCache stores the manually injected source of Cuda kernels
363  */
364 class ManualCudaCache : public Cache<ManualCudaCache> {
365  private:
366  friend class Cache<ManualCudaCache>;
367  using Protobuf = ManualCudaCacheProto;
368  static std::shared_ptr<ManualCudaCache>& getGlobalSharedCache();
369 
370  public:
371  /*
372  *A CudaCache holds multiple CachedEntry's.
373  *Each CachedEntry is split to two conceptual parts the key and the values.
374  *The values are:
375  * the specialized (wrt inputs) Cuda source code,
376  * the Cuda block and grid dimensions
377  *The key is:
378  * the kernel/op's unique id (string),
379  * the specialized input dimensions,
380  * the target architecture (string),
381  * tc's version (string),
382  */
383  struct CachedEntry {
384  CachedEntry(
385  const std::string& id,
386  const std::string& kernelSpecializedName,
387  const std::vector<int>& kernelParameters,
388  const Grid& grid,
389  const Block& block,
390  const std::vector<const DLTensor*>& inputs,
391  const std::vector<const DLTensor*>& outputs,
392  const std::string& cudaSource,
393  const std::string& deviceStr);
394 
395  CachedEntry(const ManualCudaCacheEntryProto& buf);
396  ManualCudaCacheEntryProto toProtobuf() const;
397 
398  struct Key {
399  std::string id;
400  std::vector<detail::TensorInfo> inputs;
401  std::vector<detail::TensorInfo> outputs;
402  std::string deviceStr;
403  std::string gitVersion;
404  };
405 
406  struct Values {
407  std::string cudaSource;
409  std::vector<int> kernelParameters;
412  };
415  };
416 
417  private:
418  std::vector<CachedEntry> entries_;
419 
420  /*
421  *SearchKernel (through SearchKernelImpl) searches op in the cache
422  *if a cached entry that corresponds to the op's TargetDevice and the
423  *shape of inputs matches it is returned
424  */
426  const std::string& id,
427  const std::vector<detail::TensorInfo>& inputs,
428  const std::vector<detail::TensorInfo>& outputs);
430  const std::string& id,
431  const std::vector<const DLTensor*>& inputs,
432  const std::vector<const DLTensor*>& outputs);
433  const CachedEntry* searchKernel(
434  const std::string& id,
435  const std::vector<const DLTensor*>& inputs,
436  const std::vector<const DLTensor*>& outputs) const;
437 
438  // deduces whether C is const or non-const
439  template <typename C, typename InputTy>
440  static auto searchKernelImpl(
441  C& c,
442  const std::string& id,
443  const std::vector<InputTy>& inputs,
444  const std::vector<InputTy>& outputs)
445  -> decltype(c.searchKernel(id, inputs, outputs));
446 
447  public:
448  ManualCudaCache() = default;
449  ManualCudaCache(const ManualCudaCacheProto& buf);
450  ManualCudaCacheProto toProtobuf() const;
451 
452  /*
453  *Stores (cudaSource, grid, block, specializedName, parameters)
454  *in the cache with key (id, input shapes, output shapes,
455  *target device). If the key already exist in the cache,
456  *the values are replaced.
457  */
458  void cacheKernel(
459  const std::string& id,
460  const std::vector<const DLTensor*>& inputs,
461  const std::vector<const DLTensor*>& outputs,
462  const std::string& kernelSpecializedName,
463  const std::vector<int>& kernelParameters,
464  const std::string& cudaSource,
465  const Grid& grid,
466  const Block& block);
467 
468  /*
469  *Returns the cache entry that matches
470  *op(id, target device) and inputs' shapes.
471  */
472  std::unique_ptr<CudaCache::RetrievalResult> retrieveKernel(
473  const std::string& id,
474  const std::vector<const DLTensor*>& inputs,
475  const std::vector<const DLTensor*>& outputs) const;
476 };
477 
479  CudaCache& cc,
480  const OptionsCache& oc);
481 
482 bool operator==(
483  const std::vector<const DLTensor*>& inputsTensor,
484  const std::vector<detail::TensorInfo>& inputsInfo);
485 
486 std::string makeOptionsFilename(const std::string& filename);
487 
488 std::string makeCudaFilename(const std::string& filename);
489 
490 } // namespace tc
std::vector< detail::TensorInfo > inputs
Definition: compilation_cache.h:400
std::string makeOptionsFilename(const std::string &filename)
CudaCacheProto Protobuf
Definition: compilation_cache.h:96
std::string id
Definition: compilation_cache.h:140
CachedEntry * searchKernel(const std::string &id, const std::vector< detail::TensorInfo > &inputs, const std::vector< detail::TensorInfo > &outputs)
Definition: compilation_cache.h:398
Definition: compilation_cache.h:258
ManualCudaCacheProto Protobuf
Definition: compilation_cache.h:367
MappingOptions options
Definition: compilation_cache.h:325
Definition: compilation_cache.h:100
static bool cacheEnabled()
Definition: compilation_cache-inl.h:80
Specializing CudaDim to differentiate between Block and Grid sizes.
Definition: mapping_options.h:208
void keepOnlyBestCandidates(size_t numberToKeep)
CachedEntry(const std::string &id, const std::string &kernelSpecializedName, const std::vector< int > &kernelParameters, const Grid &grid, const Block &block, const MappingOptions &mappingOptions, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &cudaSource, const std::string &deviceStr)
std::string kernelSpecializedName
Definition: compilation_cache.h:150
Definition: compilation_cache.h:406
void removeFromCudaCacheEntriesNotInOptionsCache(CudaCache &cc, const OptionsCache &oc)
Definition: compilation_cache.h:93
MappingOptions mappingOptions
Definition: compilation_cache.h:281
static void dumpCacheToProtobuf(const std::string &filename)
Definition: compilation_cache-inl.h:48
CachedEntry * searchKernel(const std::string &id, const MappingOptions &options, const std::vector< detail::TensorInfo > &inputs, const std::vector< detail::TensorInfo > &outputs)
OptionsCache()=default
std::string cudaSource
Definition: compilation_cache.h:149
std::string deviceStr
Definition: compilation_cache.h:402
static auto searchKernelImpl(C &c, const std::string &id, const std::vector< InputTy > &inputs, const std::vector< InputTy > &outputs) -> decltype(c.searchKernel(id, inputs, outputs))
std::vector< detail::TensorInfo > inputs
Definition: compilation_cache.h:272
Block block
Definition: compilation_cache.h:411
static auto searchKernelImpl(C &c, const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs) -> decltype(c.searchKernel(id, inputs, outputs))
Definition: compilation_cache-inl.h:131
ManualCudaCacheEntryProto toProtobuf() const
void cacheKernel(const std::string &id, const MappingOptions &options, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &kernelSpecializedName, const std::vector< int > &kernelParameters, const std::string &cudaSource, const Grid &grid, const Block &block)
Definition: compilation_cache.h:81
bool operator==(const DLTensor *t) const
static void loadCacheFromProtobuf(const std::string &filename)
Definition: compilation_cache-inl.h:60
Grid grid
Definition: compilation_cache.h:104
decltype(entries_)::const_iterator end() const
OptionsCacheProto Protobuf
Definition: compilation_cache.h:231
Key key
Definition: compilation_cache.h:155
static std::shared_ptr< CudaCache > & getGlobalSharedCache()
void removeEntriesNotInOptionsCache(const OptionsCache &oc)
Block block
Definition: compilation_cache.h:105
size_t size() const
Definition: compilation_cache-inl.h:85
std::vector< detail::TensorInfo > outputs
Definition: compilation_cache.h:143
CacheEntrySameKeyDifferentValue(const std::string &what_arg)
Definition: compilation_cache.h:83
Definition: compilation_cache.h:139
Key key
Definition: compilation_cache.h:413
uint64_t alignment
Definition: compilation_cache.h:44
Grid grid
Definition: compilation_cache.h:152
static void disableCache()
Definition: compilation_cache-inl.h:34
decltype(entries_)::const_iterator begin() const
std::string specializedName
Definition: compilation_cache.h:102
Key key
Definition: compilation_cache.h:284
ManualCudaCache()=default
std::vector< Duration > recordedRuntimes
Definition: compilation_cache.h:326
std::unique_ptr< CudaCache::RetrievalResult > retrieveKernel(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs) const
std::string kernelSpecializedName
Definition: compilation_cache.h:408
std::string id
Definition: compilation_cache.h:271
static void enableCache()
Definition: compilation_cache-inl.h:29
static auto searchKernelImpl(C &c, const std::string &id, const MappingOptions &options, const std::vector< TensorTy > &inputs, const std::vector< TensorTy > &outputs) -> decltype(c.searchKernel(id, options, inputs, outputs))
void cacheKernel(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &kernelSpecializedName, const std::vector< int > &kernelParameters, const std::string &cudaSource, const Grid &grid, const Block &block)
Definition: mapping_options.h:336
OptionsCacheProto toProtobuf() const
std::chrono::high_resolution_clock::duration Duration
Definition: rtc.h:31
CudaCache()=default
std::vector< int > kernelParameters
Definition: compilation_cache.h:409
std::vector< RetrievalResult > retrieveOptionsAndRuntimes(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs) const
std::vector< Values > values
Definition: compilation_cache.h:285
OptionsCacheEntryProto toProtobuf() const
TensorInfo(const DLTensor *t)
size_t totalSize() const
std::vector< CachedEntry > entries_
Definition: compilation_cache.h:418
static std::shared_ptr< ManualCudaCache > & getGlobalSharedCache()
std::string gitVersion
Definition: compilation_cache.h:403
Values values
Definition: compilation_cache.h:414
int numberCacheAttemps
Definition: compilation_cache.h:74
CudaCacheEntryProto toProtobuf() const
std::unique_ptr< RetrievalResult > retrieveKernel(const std::string &id, const MappingOptions &options, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs) const
CacheEntrySameKeyDifferentValue(const char *what_arg)
Definition: compilation_cache.h:85
std::string deviceStr
Definition: compilation_cache.h:144
std::vector< CachedEntry > entries_
Definition: compilation_cache.h:160
Definition: compilation_cache.h:383
bool operator==(const std::vector< const DLTensor * > &inputsTensor, const std::vector< detail::TensorInfo > &inputsInfo)
Definition: compilation_cache.h:148
static std::shared_ptr< OptionsCache > & getGlobalSharedCache()
static std::shared_ptr< CC > getCache()
Definition: compilation_cache-inl.h:39
std::mutex mtx_
Definition: compilation_cache.h:78
std::vector< detail::TensorInfo > outputs
Definition: compilation_cache.h:273
Definition: compilation_cache.h:229
std::string gitVersion
Definition: compilation_cache.h:145
std::string gitVersion
Definition: compilation_cache.h:275
std::string cudaSource
Definition: compilation_cache.h:407
std::vector< CachedEntry > entries_
Definition: compilation_cache.h:289
Specializing CudaDim to differentiate between Block and Grid sizes.
Definition: mapping_options.h:196
CachedEntry(const std::string &id, const std::string &kernelSpecializedName, const std::vector< int > &kernelParameters, const Grid &grid, const Block &block, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &cudaSource, const std::string &deviceStr)
std::string makeCudaFilename(const std::string &filename)
std::vector< Duration > recordedRuntimes
Definition: compilation_cache.h:282
DLDataType dType
Definition: compilation_cache.h:45
std::vector< detail::TensorInfo > outputs
Definition: compilation_cache.h:401
Values(const MappingOptions &options, Duration runtime)
Definition: compilation_cache.h:278
void recordRuntime(const std::string &id, const MappingOptions &options, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, Duration runtime)
Definition: compilation_cache.h:123
std::vector< int64_t > strides
Definition: compilation_cache.h:43
Definition: compilation_cache.h:324
CachedEntry(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &deviceStr, const MappingOptions &options, Duration runtime)
Key(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, const std::string &deviceStr, const std::string &gitVersion)
Block block
Definition: compilation_cache.h:153
int numberAttemptedRetrievals
Definition: compilation_cache.h:72
Values values
Definition: compilation_cache.h:156
std::unique_ptr< MappingOptions > retrieveBestOptions(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs) const
std::vector< int > parameters
Definition: compilation_cache.h:103
std::vector< detail::TensorInfo > inputs
Definition: compilation_cache.h:142
CachedEntry * searchKernel(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs)
Definition: compilation_cache.h:41
TensorInfoProto toProtobuf() const
CudaCacheProto toProtobuf() const
void clear()
Definition: compilation_cache-inl.h:91
bool operator<(const TensorInfo &t) const
int numberSuccessfulRetrievals
Definition: compilation_cache.h:73
Definition: compilation_cache.h:58
MappingOptions mappingOptions
Definition: compilation_cache.h:141
std::string deviceStr
Definition: compilation_cache.h:274
Grid grid
Definition: compilation_cache.h:410
std::vector< int > kernelParameters
Definition: compilation_cache.h:151
std::string source
Definition: compilation_cache.h:101
std::vector< MappingOptions > retrieveTopKOptions(const std::string &id, const std::vector< const DLTensor * > &inputs, const std::vector< const DLTensor * > &outputs, size_t k) const
std::vector< int64_t > shape
Definition: compilation_cache.h:42
std::string id
Definition: compilation_cache.h:399
ManualCudaCacheProto toProtobuf() const
Definition: compilation_cache.h:364
Definition: compilation_cache.h:247