[Mlir-commits] [mlir] [mlir][SCF] Modernize `coalesceLoops` method to handle `scf.for` loops with iter_args (PR #87019)

Quinn Dawkins llvmlistbot at llvm.org
Thu Mar 28 18:25:40 PDT 2024


================
@@ -557,57 +652,151 @@ LogicalResult mlir::coalesceLoops(MutableArrayRef<scf::ForOp> loops) {
 
   // 1. Make sure all loops iterate from 0 to upperBound with step 1.  This
   // allows the following code to assume upperBound is the number of iterations.
-  for (auto loop : loops)
-    normalizeLoop(loop, outermost, innermost);
+  for (auto loop : loops) {
+    OpBuilder::InsertionGuard g(rewriter);
+    rewriter.setInsertionPoint(outermost);
+    Value lb = loop.getLowerBound();
+    Value ub = loop.getUpperBound();
+    Value step = loop.getStep();
+    auto newLoopParams = normalizeLoop(rewriter, loop.getLoc(), lb, ub, step);
+    loop.setLowerBound(newLoopParams.lowerBound);
+    loop.setUpperBound(newLoopParams.upperBound);
+    loop.setStep(newLoopParams.step);
+
+    rewriter.setInsertionPointToStart(innermost.getBody());
+    unNormalizeInductionVariable(rewriter, loop.getLoc(),
+                                 loop.getInductionVar(), lb, step);
+  }
 
   // 2. Emit code computing the upper bound of the coalesced loop as product
   // of the number of iterations of all loops.
-  OpBuilder builder(outermost);
+  OpBuilder::InsertionGuard g(rewriter);
+  rewriter.setInsertionPoint(outermost);
   Location loc = outermost.getLoc();
-  Value upperBound = outermost.getUpperBound();
-  for (auto loop : loops.drop_front())
-    upperBound =
-        builder.create<arith::MulIOp>(loc, upperBound, loop.getUpperBound());
+  SmallVector<Value> upperBounds = llvm::map_to_vector(
+      loops, [](auto loop) { return loop.getUpperBound(); });
+  Value upperBound = getProductOfIntsOrIndexes(rewriter, loc, upperBounds);
   outermost.setUpperBound(upperBound);
 
-  builder.setInsertionPointToStart(outermost.getBody());
-
-  // 3. Remap induction variables. For each original loop, the value of the
-  // induction variable can be obtained by dividing the induction variable of
-  // the linearized loop by the total number of iterations of the loops nested
-  // in it modulo the number of iterations in this loop (remove the values
-  // related to the outer loops):
-  //   iv_i = floordiv(iv_linear, product-of-loop-ranges-until-i) mod range_i.
-  // Compute these iteratively from the innermost loop by creating a "running
-  // quotient" of division by the range.
-  Value previous = outermost.getInductionVar();
-  for (unsigned i = 0, e = loops.size(); i < e; ++i) {
-    unsigned idx = loops.size() - i - 1;
-    if (i != 0)
-      previous = builder.create<arith::DivSIOp>(loc, previous,
-                                                loops[idx + 1].getUpperBound());
-
-    Value iv = (i == e - 1) ? previous
-                            : builder.create<arith::RemSIOp>(
-                                  loc, previous, loops[idx].getUpperBound());
-    replaceAllUsesInRegionWith(loops[idx].getInductionVar(), iv,
-                               loops.back().getRegion());
-  }
-
-  // 4. Move the operations from the innermost just above the second-outermost
-  // loop, delete the extra terminator and the second-outermost loop.
-  scf::ForOp second = loops[1];
-  innermost.getBody()->back().erase();
-  outermost.getBody()->getOperations().splice(
-      Block::iterator(second.getOperation()),
-      innermost.getBody()->getOperations());
-  second.erase();
+  rewriter.setInsertionPointToStart(innermost.getBody());
+  auto [delinearizeIvs, preservedUsers] = delinearizeInductionVariable(
+      rewriter, loc, outermost.getInductionVar(), upperBounds);
+  rewriter.replaceAllUsesExcept(outermost.getInductionVar(), delinearizeIvs[0],
+                                preservedUsers);
+
+  for (int i = loops.size() - 1; i > 0; --i) {
+    auto outerLoop = loops[i - 1];
+    auto innerLoop = loops[i];
+
+    rewriter.replaceAllUsesWith(innerLoop.getInductionVar(), delinearizeIvs[i]);
+    for (auto [outerLoopIterArg, innerLoopIterArg] : llvm::zip_equal(
+             outerLoop.getRegionIterArgs(), innerLoop.getRegionIterArgs())) {
+      rewriter.replaceAllUsesExcept(innerLoopIterArg, outerLoopIterArg,
+                                    preservedUsers);
+    }
+    Operation *innerTerminator = innerLoop.getBody()->getTerminator();
+    auto yieldedVals = llvm::to_vector(innerTerminator->getOperands());
+    rewriter.eraseOp(innerTerminator);
+    outerLoop.getBody()->getOperations().splice(
+        Block::iterator(innerLoop), innerLoop.getBody()->getOperations());
+    rewriter.replaceOp(innerLoop, yieldedVals);
+  }
   return success();
 }
 
+LogicalResult mlir::coalesceLoops(MutableArrayRef<scf::ForOp> loops) {
+  if (loops.empty()) {
+    return success();
----------------
qedawkins wrote:

This looks inconsistent with the variant that take the rewriter, which returns failure if there are no loops
```
  if (loops.size() < 2)
    return failure();
```
This should either return failure too or the other should return success given no loops.

https://github.com/llvm/llvm-project/pull/87019


More information about the Mlir-commits mailing list