[Mlir-commits] [mlir] [NFC] Simplify the tiling implementation using cloning. (PR #72178)

lorenzo chelini llvmlistbot at llvm.org
Tue Nov 14 01:16:12 PST 2023


================
@@ -169,136 +174,98 @@ static SmallVector<scf::ForOp> generateTileLoopNest(
     }
 
     auto loop = builder.create<scf::ForOp>(
-        loc, offset, size, tileSize, ValueRange{},
+        loc, offset, size, tileSize, destinationTensors,
         [&](OpBuilder &bodyBuilder, Location bodyLoc, Value iv,
             ValueRange /*iterArgs*/) {
           sizes[loopRange.index()] =
               getBoundedTileSize(bodyBuilder, bodyLoc, loopRange.value(), iv,
                                  getAsOpFoldResult(tileSize));
-          builder.create<scf::YieldOp>(loc);
         });
     offsets[loopRange.index()] = loop.getInductionVar();
     loops.push_back(loop);
-    builder.setInsertionPoint(loop.getBody()->getTerminator());
+    builder.setInsertionPointToEnd(loop.getBody());
+    destinationTensors = loop.getRegionIterArgs();
   }
-  return loops;
-}
 
-/// For a value to be yielded (`yieldedValue`) from within a loop nest `loops`,
-/// construct the destructive update pattern that inserts the yielded
-/// value into a destination tensor provided by `initValue` at offset
-/// `tileOffsets` and size `tileSizes`. For example,
-///
-/// ```mlir
-/// scf.for %iv0 = ... {
-///   %0 = tiled_op
-/// }
-/// ```
-///
-/// is transformed to
-///
-/// ```mlir
-/// scf.for %iv0 = ... iter_args(%arg = %0) {
-///   %1 = tensor.extract_slice %arg
-///   %2 = tiled_op
-///   %3 = tensor.insert_slice %2 into %arg
-///   scf.yield %3
-/// }
-/// ```
-/// TODO: This API can be cleaned up by using `SubsetExtractOpInterface`.
-static SmallVector<Value>
-yieldTiledValues(RewriterBase &rewriter, ValueRange initValues,
-                 ValueRange yieldedValues,
-                 ArrayRef<SmallVector<OpFoldResult>> tileOffsetsList,
-                 ArrayRef<SmallVector<OpFoldResult>> tileSizesList,
-                 MutableArrayRef<scf::ForOp> loops) {
-  NewYieldValuesFn yieldValueFn =
-      [&](OpBuilder &b, Location loc,
-          ArrayRef<BlockArgument> newBBArgs) -> SmallVector<Value> {
-    SmallVector<Value> inserts;
-    for (const auto &yieldedValue : llvm::enumerate(yieldedValues)) {
-      ArrayRef<OpFoldResult> tileOffsets =
-          tileOffsetsList[yieldedValue.index()];
-      ArrayRef<OpFoldResult> tileSizes = tileSizesList[yieldedValue.index()];
-      SmallVector<OpFoldResult> tileStrides(tileOffsets.size(),
-                                            b.getIndexAttr(1));
-      Value insert = b.create<tensor::InsertSliceOp>(
-          loc, yieldedValue.value(), newBBArgs[yieldedValue.index()],
-          tileOffsets, tileSizes, tileStrides);
-      inserts.push_back(insert);
-    }
-    return inserts;
-  };
-
-  SmallVector<scf::ForOp> newLoops =
-      replaceLoopNestWithNewYields(rewriter, loops, initValues, yieldValueFn,
-                                   /*replaceIterOperandsUsesInLoop =*/false);
-  for (const auto &loop : llvm::enumerate(loops)) {
-    loops[loop.index()] = newLoops[loop.index()];
+  // Add the scf.yield operations for all the outer loops.
+  for (auto [outerLoop, innerLoop] :
+       llvm::zip(MutableArrayRef(loops).drop_back(),
+                 MutableArrayRef(loops).drop_front())) {
+    builder.setInsertionPointToEnd(outerLoop.getBody());
+    builder.create<scf::YieldOp>(outerLoop.getLoc(), innerLoop.getResults());
   }
-  return llvm::to_vector(llvm::map_range(
-      loops.front().getResults().take_back(yieldedValues.size()),
-      [](OpResult r) -> Value { return r; }));
+  return loops;
 }
 
-/// If the tiled operation is destination passing style, update the
-/// slice of the destination used (which refers to the untiled destination)
-/// to use the corresponding region argument of the innermost loop.
-///
-/// ```mlir
-/// %0 =
-/// scf.for %iv0 = ... iter_args(%arg = %0) {
-///   %1 = tensor.extract_slice %0
-///   %2 = tiled_op
-///   %3 = tensor.insert_slice %2 into %arg
-///   scf.yield %3
-/// }
-/// ```
-///
-/// is transformed to
-///
-/// ```mlir
-/// scf.for %iv0 = ... iter_args(%arg = %0) {
-///   %1 = tensor.extract_slice %arg
-///   %2 = tiled_op
-///   %3 = tensor.insert_slice %2 into %arg
-///   scf.yield %3
-/// }
-/// ```
-static void
-updateDestinationOperandsForTiledOp(OpBuilder &builder,
-                                    ValueRange tiledOpDestinationValues,
-                                    ValueRange bbArgsList) {
-  for (const auto &destValue : llvm::enumerate(tiledOpDestinationValues)) {
-    auto sliceOp = destValue.value().getDefiningOp<tensor::ExtractSliceOp>();
-    if (!sliceOp)
-      continue;
-    sliceOp.setOperand(0, bbArgsList[destValue.index()]);
+/// Method to add new init values to a loop nest. Updates `loops` in-place with
+/// new loops that use the `newInitValues`.
+/// The outer-loops are updated to yield the new result values of the inner
+/// loop. For the innermost loop, the call back `getNewYields` is invoked to get
+/// the additional values to yield form the innermost loop.
+static void addInitOperandsToLoopNest(
+    RewriterBase &rewriter, MutableArrayRef<scf::ForOp> loops,
+    ValueRange newInitValues,
+    llvm::function_ref<SmallVector<Value>(RewriterBase &rewriter, Value iv,
+                                          ValueRange newRegionIterArgs)>
+        getNewYieldValsFn) {
+  SmallVector<scf::ForOp> newLoops;
+  if (loops.empty()) {
+    return;
+  }
+  OpBuilder::InsertionGuard g(rewriter);
+  rewriter.setInsertionPoint(loops.front());
+  for (auto &loop : loops) {
+    rewriter.setInsertionPoint(loop);
+
+    // Create a new loop with the new init values for this loop.
+    SmallVector<Value> newInits = llvm::to_vector(loop.getInitArgs());
+    newInits.append(newInitValues.begin(), newInitValues.end());
+    auto newLoop = rewriter.create<scf::ForOp>(
+        loop.getLoc(), loop.getLowerBound(), loop.getUpperBound(),
+        loop.getStep(), newInits,
+        [&](OpBuilder &b, Location loc, Value iv, ValueRange iterArgs) {});
----------------
chelini wrote:

I believe we can avoid the lambda here.

https://github.com/llvm/llvm-project/pull/72178


More information about the Mlir-commits mailing list