[Mlir-commits] [mlir] [mlir][TilingInterface] Use `LoopLikeOpInterface` in tiling using SCF to unify tiling with `scf.for` and `scf.forall`. (PR #77874)

lorenzo chelini llvmlistbot at llvm.org
Fri Jan 12 07:42:19 PST 2024


================
@@ -135,93 +111,231 @@ static Operation *cloneOpAndUpdateDestinationArgs(RewriterBase &rewriter,
   return clonedOp;
 }
 
-/// Generate an empty loop nest that represents the tiled loop nest shell.
+/// Type of the call back function used to generate the body of the tiled
+/// loops. The loop generation methods use this callback to generate
+/// the body of the inner-most tile loop. The call back is provided with
+/// - `rewriter`: with insertion point set to the end of the inner most loop
+///    body
+/// - `ivs`: the induction variables for the surrounding loops.
+/// - `regionIterArgs`: the basic block arguments of the inner most loop that
+///   correspond to the init/result of the loop.
+/// - `resultOffsets/Sizes`: The call back returns the result of tiling the
+///   operation, but in `resultOffsets` and `resultSizes` the callback function
+///   is expected to populate the offsets/sizes to use while inserting the
+///   result back into the corresponding `regionIterArgs`. These values
+///   are used by the loop generation methods to create the appropriate yield
+///   values from the inner most loop.
+/// The call back returns the `TilingResult` obtained from tiling the operation.
+using TileLoopBodyFn = std::function<FailureOr<TilingResult>(
+    RewriterBase &rewriter, Location loc, ValueRange ivs,
+    ValueRange regionIterArgs,
+    SmallVector<SmallVector<OpFoldResult>> &resultOffsets,
+    SmallVector<SmallVector<OpFoldResult>> &resultSizes)>;
+
+/// Generate the tile-loop nest using `scf.for` operation.
 /// - `loopRanges` specifies the lb, ub and step of the untiled iteration space.
 /// - `tileSizes` is the tile sizes to use. Zero represent untiled loops.
-/// - In `offsets` and `sizes` return the multi-dimensional offset and size of
-///   the tile processed within the inner most loop.
-/// Note that this methods adds `scf.yield` operation for all but the innermost
-/// loop. These yield the value returned by the immediately inner loop. The
-/// caller is expected to add the scf.yield operation for the innermost loop.
-static SmallVector<scf::ForOp> generateTileLoopNest(
-    OpBuilder &builder, Location loc, ArrayRef<Range> loopRanges,
-    ArrayRef<OpFoldResult> tileSizes, SmallVector<OpFoldResult> &offsets,
-    SmallVector<OpFoldResult> &sizes, ValueRange destinationTensors = {}) {
-  if (loopRanges.empty())
-    return {};
+/// - `destinationTensors` are the init values to use for the outer most loop.
+/// - `tileLoopBodyFn` is called to generated the loop body of the inner most
+///    loop.
+/// - `loops` is an in-out parameter into which the generated loops are
+///    populated.
+/// The `TilingResult` returned by calling `tileLoopBodyFn` is returned back
+/// to the caller.
+static FailureOr<TilingResult> generateLoopNestUsingForOp(
+    RewriterBase &rewriter, Location loc, ArrayRef<Range> loopRanges,
+    ArrayRef<OpFoldResult> tileSizes, ValueRange destinationTensors,
+    TileLoopBodyFn tileLoopBodyFn, SmallVector<LoopLikeOpInterface> &loops) {
+  assert(!loopRanges.empty() && "unexpected empty loop ranges");
   assert(loopRanges.size() == tileSizes.size() &&
          "expected as many tile sizes as loop ranges");
-  OpBuilder::InsertionGuard guard(builder);
-  SmallVector<scf::ForOp> loops;
-  offsets.resize(loopRanges.size());
-  sizes.resize(loopRanges.size());
-
-  for (auto loopRange : llvm::enumerate(loopRanges)) {
-    Value offset =
-        getValueOrCreateConstantIndexOp(builder, loc, loopRange.value().offset);
-    Value size =
-        getValueOrCreateConstantIndexOp(builder, loc, loopRange.value().size);
-    Value tileSize = getValueOrCreateConstantIndexOp(
-        builder, loc, tileSizes[loopRange.index()]);
+  OpBuilder::InsertionGuard guard(rewriter);
+  SmallVector<Value> ivs;
+
+  for (auto [loopRange, tileSize] : llvm::zip_equal(loopRanges, tileSizes)) {
     // No loops if tile size is zero. Set offset and size to the loop
     // offset and size.
-    if (matchPattern(tileSize, m_Zero())) {
-      offsets[loopRange.index()] = offset;
-      sizes[loopRange.index()] = size;
+    if (isConstantIntValue(tileSize, 0)) {
       continue;
     }
 
-    auto loop = builder.create<scf::ForOp>(
-        loc, offset, size, tileSize, destinationTensors,
-        [&](OpBuilder &bodyBuilder, Location bodyLoc, Value iv,
-            ValueRange /*iterArgs*/) {
-          sizes[loopRange.index()] =
-              getBoundedTileSize(bodyBuilder, bodyLoc, loopRange.value(), iv,
-                                 getAsOpFoldResult(tileSize));
-        });
-    offsets[loopRange.index()] = loop.getInductionVar();
+    Value lb = getValueOrCreateConstantIndexOp(rewriter, loc, loopRange.offset);
+    Value ub = getValueOrCreateConstantIndexOp(rewriter, loc, loopRange.size);
+    Value step = getValueOrCreateConstantIndexOp(rewriter, loc, tileSize);
+    auto loop =
+        rewriter.create<scf::ForOp>(loc, lb, ub, step, destinationTensors,
+                                    [](OpBuilder &bodyBuilder, Location bodyLoc,
+                                       Value iv, ValueRange /*iterArgs*/) {});
     loops.push_back(loop);
-    builder.setInsertionPointToEnd(loop.getBody());
+    ivs.push_back(loop.getInductionVar());
+    rewriter.setInsertionPointToEnd(loop.getBody());
     destinationTensors = loop.getRegionIterArgs();
   }
 
+  SmallVector<SmallVector<OpFoldResult>> resultOffsets, resultSizes;
+  FailureOr<TilingResult> tilingResult = tileLoopBodyFn(
+      rewriter, loc, ivs, destinationTensors, resultOffsets, resultSizes);
+  if (failed(tilingResult)) {
+    return rewriter.notifyMatchFailure(
+        loc, "failed to generate inner tile loop body");
+  }
+  if (loops.empty()) {
+    return tilingResult;
+  }
+
+  // 6. Yield all the results of the tiled operation.
+  SmallVector<Value> yieldedValues;
+  for (auto [tiledValue, destinationTensor, resultOffset, resultSize] :
+       llvm::zip_equal(tilingResult->tiledValues, destinationTensors,
+                       resultOffsets, resultSizes)) {
+    SmallVector<OpFoldResult> resultStride(resultOffset.size(),
+                                           rewriter.getIndexAttr(1));
+    auto insertSlice = rewriter.create<tensor::InsertSliceOp>(
+        loc, tiledValue, destinationTensor, resultOffset, resultSize,
+        resultStride);
+    yieldedValues.push_back(insertSlice);
+  }
+  rewriter.create<scf::YieldOp>(loc, yieldedValues);
+
   // Add the scf.yield operations for all the outer loops.
-  if (!loops.empty()) {
-    for (auto [outerLoop, innerLoop] :
-         llvm::zip_equal(MutableArrayRef(loops).drop_back(),
-                         MutableArrayRef(loops).drop_front())) {
-      builder.setInsertionPointToEnd(outerLoop.getBody());
-      builder.create<scf::YieldOp>(outerLoop.getLoc(), innerLoop.getResults());
-    }
+  for (auto [outerLoop, innerLoop] :
+       llvm::zip_equal(MutableArrayRef(loops).drop_back(),
+                       MutableArrayRef(loops).drop_front())) {
+    rewriter.setInsertionPointToEnd(
+        cast<scf::ForOp>(outerLoop.getOperation()).getBody());
+    rewriter.create<scf::YieldOp>(outerLoop.getLoc(), innerLoop->getResults());
   }
-  return loops;
+  return tilingResult;
+}
+
+/// Generate the tile-loop nest using `scf.for` operation.
+/// - `loopRanges` specifies the lb, ub and step of the untiled iteration space.
+/// - `tileSizes` is the tile sizes to use. Zero represent untiled loops.
+/// - `destinationTensors` are the init values to use for the outer most loop.
+/// - `mappingVector` is the mapping attributes to use for loop construction.
+///   Can be empty.
+/// - `tileLoopBodyFn` is called to generated the loop body of the inner most
+///    loop.
+/// - `loops` is an in-out parameter into which the generated loops are
+///    populated.
+/// The `TilingResult` returned by calling `tileLoopBodyFn` is returned back
+/// to the caller.
+static FailureOr<TilingResult> generateLoopNestUsingForallOp(
+    RewriterBase &rewriter, Location loc, ArrayRef<Range> loopRanges,
+    ArrayRef<OpFoldResult> tileSizes, ArrayRef<Attribute> mappingVector,
+    ValueRange destinationTensors, TileLoopBodyFn tiledBodyFn,
+    SmallVector<LoopLikeOpInterface> &loops) {
+  SmallVector<OpFoldResult> lbs, ubs, steps;
+  assert(!loopRanges.empty() && "unexpected empty loop ranges");
+  assert(loopRanges.size() == tileSizes.size() &&
+         "expected as many tile sizes as loop ranges");
+  OpBuilder::InsertionGuard guard(rewriter);
+  SmallVector<OpFoldResult> offsets(loopRanges.size()),
+      sizes(loopRanges.size());
+
+  for (auto [tileSize, loopRange] : llvm::zip(tileSizes, loopRanges)) {
+    if (isConstantIntValue(tileSize, 0))
+      continue;
+    lbs.push_back(loopRange.offset);
+    ubs.push_back(loopRange.size);
+    steps.push_back(tileSize);
+  }
+  assert(!lbs.empty() && "Expected at least one loop range");
+
+  std::optional<ArrayAttr> mappingAttr;
+  if (!mappingVector.empty()) {
----------------
chelini wrote:

braces here and a bit below (line 261).

https://github.com/llvm/llvm-project/pull/77874


More information about the Mlir-commits mailing list