[Mlir-commits] [mlir] [SCF] allow indexing operations for loop coalesceing (PR #183180)

Nirvedh Meshram llvmlistbot at llvm.org
Wed Feb 25 09:27:43 PST 2026


https://github.com/nirvedhmeshram updated https://github.com/llvm/llvm-project/pull/183180

>From a87f1048faae3b287d134279000e6e6e636ef6da Mon Sep 17 00:00:00 2001
From: Nirvedh Meshram <nirvedh at gmail.com>
Date: Tue, 24 Feb 2026 15:46:39 -0600
Subject: [PATCH] [SCF] allow indexing operations for loop coalesceing

Currently if there are operations between the loops we get a dominance issue
as the delinearlized index is added after the operations. This PR fixes that.
For testing we also add a transform pattern that makes a direct call to coalesceLoops
as the exisiting pattern calls coalescePerfectlyNestedSCFForLoops which does not
consider the loop nest perfectly nested which is safer for that usage.
Signed-off-by: Nirvedh Meshram <nirvedh at gmail.com>

Co-Authored-By: Claude Opus 4.6 <noreply at anthropic.com>
---
 .../SCF/TransformOps/SCFTransformOps.td       | 28 +++++++++++
 .../SCF/TransformOps/SCFTransformOps.cpp      | 46 +++++++++++++++++
 mlir/lib/Dialect/SCF/Utils/Utils.cpp          |  3 +-
 .../Dialect/SCF/transform-op-coalesce.mlir    | 49 +++++++++++++++++++
 4 files changed, 125 insertions(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td b/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td
index e2b42208f3f8e..f97225477ef8b 100644
--- a/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td
+++ b/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td
@@ -392,6 +392,34 @@ def LoopCoalesceOp : Op<Transform_Dialect, "loop.coalesce", [
   }];
 }
 
+def LoopCoalesceNestedOp : Op<Transform_Dialect, "loop.coalesce_nested", [
+  FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
+  TransformOpInterface, TransformEachOpTrait]> {
+  let summary = "Coalesces nested loops including imperfectly nested ones";
+  let description = [{
+    Given a loop, collects all nested loops (including imperfectly nested
+    loops with operations between them) and coalesces them directly using
+    coalesceLoops.
+
+    #### Return modes
+
+    The return handle points to the coalesced loop.
+  }];
+  let arguments = (ins TransformHandleTypeInterface:$target);
+  let results = (outs TransformHandleTypeInterface:$transformed);
+
+  let assemblyFormat =
+      "$target attr-dict `:` functional-type($target, $transformed)";
+
+  let extraClassDeclaration = [{
+    ::mlir::DiagnosedSilenceableFailure applyToOne(
+        ::mlir::transform::TransformRewriter &rewriter,
+        ::mlir::Operation *target,
+        ::mlir::transform::ApplyToEachResultList &results,
+        ::mlir::transform::TransformState &state);
+  }];
+}
+
 def TakeAssumedBranchOp : Op<Transform_Dialect, "scf.take_assumed_branch", [
   DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
   TransformOpInterface, TransformEachOpTrait]> {
diff --git a/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp b/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp
index 71fe9870ac170..d4e46ceb946d5 100644
--- a/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp
+++ b/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp
@@ -450,6 +450,52 @@ transform::LoopCoalesceOp::applyToOne(transform::TransformRewriter &rewriter,
   return DiagnosedSilenceableFailure::success();
 }
 
+//===----------------------------------------------------------------------===//
+// LoopCoalesceNestedOp
+//===----------------------------------------------------------------------===//
+DiagnosedSilenceableFailure transform::LoopCoalesceNestedOp::applyToOne(
+    transform::TransformRewriter &rewriter, Operation *op,
+    transform::ApplyToEachResultList &results,
+    transform::TransformState &state) {
+  auto forOp = dyn_cast<scf::ForOp>(op);
+  if (!forOp) {
+    return emitSilenceableError() << "expected scf.for operation";
+  }
+
+  // Collect nested loops (including imperfectly nested ones)
+  SmallVector<scf::ForOp> nestedLoops;
+  scf::ForOp currentLoop = forOp;
+
+  while (currentLoop) {
+    nestedLoops.push_back(currentLoop);
+    Block &body = currentLoop.getRegion().front();
+
+    // Look for the next nested loop
+    scf::ForOp nextLoop = nullptr;
+    for (Operation &bodyOp : body) {
+      if (auto innerFor = dyn_cast<scf::ForOp>(&bodyOp)) {
+        nextLoop = innerFor;
+        break;
+      }
+    }
+
+    currentLoop = nextLoop;
+  }
+
+  // Need at least 2 loops to coalesce
+  if (nestedLoops.size() < 2) {
+    return emitSilenceableError() << "need at least 2 nested loops to coalesce";
+  }
+
+  // Call coalesceLoops directly
+  if (failed(coalesceLoops(rewriter, nestedLoops))) {
+    return emitSilenceableError() << "failed to coalesce nested loops";
+  }
+
+  results.push_back(nestedLoops.front());
+  return DiagnosedSilenceableFailure::success();
+}
+
 //===----------------------------------------------------------------------===//
 // TakeAssumedBranchOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
index 9f569ea05967c..2dd7df09e0f22 100644
--- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
@@ -957,7 +957,8 @@ LogicalResult mlir::coalesceLoops(RewriterBase &rewriter,
   Value upperBound = getProductOfIntsOrIndexes(rewriter, loc, upperBounds);
   outermost.setUpperBound(upperBound);
 
-  rewriter.setInsertionPointToStart(innermost.getBody());
+  // Insert delinearization at the start of the outermost loop body.
+  rewriter.setInsertionPointToStart(outermost.getBody());
   auto [delinearizeIvs, preservedUsers] = delinearizeInductionVariable(
       rewriter, loc, outermost.getInductionVar(), upperBounds);
   rewriter.replaceAllUsesExcept(outermost.getInductionVar(), delinearizeIvs[0],
diff --git a/mlir/test/Dialect/SCF/transform-op-coalesce.mlir b/mlir/test/Dialect/SCF/transform-op-coalesce.mlir
index 03ddee1c7a98a..467020e331295 100644
--- a/mlir/test/Dialect/SCF/transform-op-coalesce.mlir
+++ b/mlir/test/Dialect/SCF/transform-op-coalesce.mlir
@@ -365,3 +365,52 @@ module attributes {transform.with_named_sequence} {
 //   CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
 //       CHECK:   scf.for %[[IV:.+]] = %[[C0]] to %[[ARG1]] step %[[C1]]
 //       CHECK:     "some_use"(%{{[a-zA-Z0-9]+}}, %[[C0]], %[[C0]], %[[IV]])
+
+// -----
+
+// Loops with operations between them that use the outer loop's induction variable.
+// The delinearization should be inserted early enough that these operations can 
+// use the delinearized IV.
+
+func.func @opsinbetween_nested_loops(%init: f32) -> f32 {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c3 = arith.constant 3 : index
+  %c5 = arith.constant 5 : index
+
+  %result = scf.for %i = %c0 to %c3 step %c1 iter_args(%outer = %init) -> (f32) {
+    %computed = arith.addi %i, %c5 : index
+    %used = arith.muli %computed, %c3 : index
+
+    %inner_result = scf.for %j = %c0 to %c3 step %c1 iter_args(%inner = %outer) -> (f32) {
+      %updated = "use"(%inner, %i, %j, %computed, %used) : (f32, index, index, index, index) -> f32
+      scf.yield %updated : f32
+    }
+    scf.yield %inner_result : f32
+  } {coalesce_nested}
+  return %result : f32
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce_nested} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for">
+    %2 = transform.loop.coalesce_nested %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">)
+    transform.yield
+  }
+}
+// CHECK-LABEL: func @opsinbetween_nested_loops
+//  CHECK-SAME:     %[[INIT:.+]]: f32
+//   CHECK-DAG:   %[[C3:.+]] = arith.constant 3 : index
+//   CHECK-DAG:   %[[C5:.+]] = arith.constant 5 : index
+//   CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
+//   CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
+//   CHECK-DAG:   %[[C9:.+]] = arith.constant 9 : index
+//       CHECK:   %[[RESULT:.+]] = scf.for %[[IV:.+]] = %[[C0]] to %[[C9]] step %[[C1]] iter_args(%[[ARG:.+]] = %[[INIT]]) -> (f32) {
+//       CHECK:     %[[DELIN:.+]]:2 = affine.delinearize_index %[[IV]] into (3, 3)
+//       CHECK:     %[[COMPUTED:.+]] = arith.addi %[[DELIN]]#0, %[[C5]]
+//       CHECK:     %[[USED:.+]] = arith.muli %[[COMPUTED]], %[[C3]]
+//       CHECK:     %[[UPDATED:.+]] = "use"(%[[ARG]], %[[DELIN]]#0, %[[DELIN]]#1, %[[COMPUTED]], %[[USED]])
+//       CHECK:     scf.yield %[[UPDATED]]
+//       CHECK:   }
+//       CHECK:   return %[[RESULT]]



More information about the Mlir-commits mailing list