Tensor Comprehensions
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
mapped_scop.h
Go to the documentation of this file.
1 
16 #pragma once
17 
18 #include <map>
19 #include <memory>
20 #include <string>
21 #include <unordered_map>
22 #include <vector>
23 
28 #include "tc/core/utils/dlpack.h"
29 #include "tc/external/isl.h"
30 
31 namespace tc {
32 namespace polyhedral {
33 
34 // Scop associated with fixed block and grid dimensions.
35 //
36 // Different branches of the schedule tree may be mapped to GPU blocks or
37 // threads. The role of this class is to ensure that the number of required
38 // blocks and threads is consistent for the entire Scop. It does so by
39 // requiring to provide grid and block configuration when constructing its
40 // instance. Different parts of the schedule tree may be mapped to blocks and
41 // threads but the values remain those specified at construction. If less
42 // blocks or threads is necessary to execute certain parts of the Scop, the
43 // blocks or threads dimensions will be further restricted locally in a
44 // specific branch of schedule tree.
45 //
46 // Two invariants must be preserved:
47 // 1. All paths from schedule tree root to its leaves must have exactly the
48 // same number of block and thread mappings. Code generation will fail if
49 // it is not the case (TODO: automatically map to 1 thread and 1 block
50 // instead).
51 // 2. Mapping to each block and thread must appear exactly once on each path
52 // from schedule tree root to its leaves. Mapping will fail if this
53 // invariant is violated.
54 //
55 // Only const and copy accessors to the members of the original Scop are
56 // exposed since mapping to blocks and threads introduces schedule tree
57 // elements incompatible with other Scop modifications.
58 class MappedScop {
59  private:
60  MappedScop(
61  std::unique_ptr<Scop>&& scop,
62  ::tc::Grid grid,
63  ::tc::Block block,
64  uint64_t unroll_)
65  : scop_(std::move(scop)),
66  numBlocks(grid),
67  numThreads(block),
68  unroll(unroll_) {}
69 
70  public:
71  static inline std::unique_ptr<MappedScop> makeOneBlockOneThread(
72  std::unique_ptr<Scop>&& scop) {
73  return std::unique_ptr<MappedScop>(new MappedScop(
74  std::move(scop), ::tc::Grid{1, 1, 1}, ::tc::Block{1, 1, 1}, 1));
75  }
76  static inline std::unique_ptr<MappedScop> makeMappedScop(
77  std::unique_ptr<Scop>&& scop,
78  ::tc::Grid grid,
79  ::tc::Block block,
80  uint64_t unroll) {
81  return std::unique_ptr<MappedScop>(
82  new MappedScop(std::move(scop), grid, block, unroll));
83  }
84 
85  // Apply the hand-written OuterBlockInnerThread mapping strategy.
86  static std::unique_ptr<MappedScop> makeWithOuterBlockInnerThreadStrategy(
87  std::unique_ptr<Scop>&& scopUPtr,
88  const MappingOptions& mappingOptions);
89 
90  // Map a particular "pos"-th dimension in a _band_ node identified by "tree"
91  // to the block or thread dimension. Ancestors or descendants of "tree" must
92  // not have a dimension already mapped to the same block or thread.
93  inline detail::ScheduleTree*
94  map(detail::ScheduleTree* tree, int pos, const mapping::BlockId& id) {
95  return mapToParameterWithExtent(
96  scop_->scheduleRoot(), tree, pos, id, id.mappingSize(numBlocks));
97  }
98  inline detail::ScheduleTree*
99  map(detail::ScheduleTree* tree, int pos, const mapping::ThreadId& id) {
100  return mapToParameterWithExtent(
101  scop_->scheduleRoot(), tree, pos, id, id.mappingSize(numThreads));
102  }
103 
104  // Given that "nMapped" identifiers of type "MappingTypeId" have already
105  // been mapped, map the remaining ones (up to "nToMap") to zero
106  // for all statement instances.
107  template <typename MappingTypeId>
108  void mapRemaining(detail::ScheduleTree* tree, size_t nMapped, size_t nToMap);
109 
110  // Fix the values of the specified parameters in the context
111  // to the corresponding specified values.
112  template <typename T>
113  void fixParameters(const std::unordered_map<std::string, T>& sizes) {
114  scop_->fixParameters(sizes);
115  }
116 
117  // Insert a context node for the block and thread identifiers.
118  void insertMappingContext();
119 
120  // Generate CUDA code at the current state of transformation provided a
121  // name for the generated function.
122  std::tuple<std::string, tc::Grid, tc::Block> codegen(
123  const std::string& specializedName) const;
124 
125  // Accessors..
126  // Const accessor to schedule of underlying Scop.
127  inline const detail::ScheduleTree* schedule() const {
128  return scop_->scheduleRoot();
129  }
130  // Reference to underlying scop, no ownership transfer intended.
131  inline const Scop& scop() const {
132  return *scop_;
133  }
134  inline Scop& scop() {
135  return *scop_;
136  }
137 
138  private:
139  // Map "band" to block identifiers and then scale
140  // the band members by "tileSizes".
141  void mapToBlocksAndScaleBand(
142  detail::ScheduleTree* band,
143  std::vector<size_t> tileSizes);
144  // Look for innermost reduction band members.
145  // Store them in reductionBandUpdates_ and their parents
146  // in reductionFromParent_. Return true if any were found.
147  bool detectReductions(detail::ScheduleTree* band);
148  // Does separateReduction need to be called on this node?
149  bool needReductionSeparation(const detail::ScheduleTree* st);
150  // Return the schedule that will be used by mapInnermostBandsToThreads
151  // for mapping to thread identifiers, with the last function
152  // corresponding to thread identifier x.
153  isl::multi_union_pw_aff reductionMapSchedule(const detail::ScheduleTree* st);
154  // Separate out reductions that can be mapped to an entire block.
155  // The remaining parts, if any, are no longer considered for replacement
156  // by a library call.
157  detail::ScheduleTree* separateReduction(detail::ScheduleTree* band);
158  // Map "band" to thread identifiers, assuming "nInner" thread identifiers
159  // have already been used and using as many remaining blockSizes values as
160  // outer coincident dimensions,
161  // unroll band members that execute at most "unroll" instances
162  // (if nInner == 0) and
163  // return the updated number of mapped thread identifiers.
164  size_t mapToThreads(detail::ScheduleTree* band, size_t nInner);
165  // Map innermost bands to thread identifiers and
166  // return the number of mapped thread identifiers.
167  size_t mapInnermostBandsToThreads(detail::ScheduleTree* st);
168 
169  private:
170  std::unique_ptr<Scop> scop_;
171 
172  public:
173  const ::tc::Grid numBlocks;
174  const ::tc::Block numThreads;
175  const uint64_t unroll;
176 
177  // The schedule depth that was mapped to Thread::x for specific parts of the
178  // domain.
179  // XXX: this is a partially redundant state as this information can
180  // potentially be extracted from the schedule tree; however, until we get a
181  // first-class MappingNode, it requires some dirty hacks.
182  ThreadIdxxScheduleDepthState threadIdxxScheduleDepthState;
183 
184  private:
185  // Information about a detected reduction that can potentially
186  // be mapped to a library call.
187  struct Reduction {
188  Reduction(std::vector<isl::id> ids) : ids(ids), separated(false) {}
189  // The statement identifiers of the reduction update statements.
190  std::vector<isl::id> ids;
191  // Has the reduction been separated out as a full block?
192  bool separated;
193  };
194  // Map parent band of reduction band to the reduction band.
195  // As a special case, the parent band may be missing,
196  // in which case it is the reduction band that gets mapped to itself.
197  std::unordered_map<const detail::ScheduleTree*, const detail::ScheduleTree*>
198  reductionFromParent_;
199  // Map isolated innermost reduction band members to information
200  // about the detected reduction.
201  std::map<const detail::ScheduleTree*, Reduction> reductionBandUpdates_;
202 };
203 } // namespace polyhedral
204 } // namespace tc
Specializing CudaDim to differentiate between Block and Grid sizes.
Definition: mapping_options.h:208
Specializing CudaDim to differentiate between Block and Grid sizes.
Definition: mapping_options.h:196