[Mlir-commits] [mlir] [SCF] allow indexing operations for loop coalesceing (PR #183180)
Nirvedh Meshram
llvmlistbot at llvm.org
Wed Feb 25 09:27:43 PST 2026
https://github.com/nirvedhmeshram updated https://github.com/llvm/llvm-project/pull/183180
>From a87f1048faae3b287d134279000e6e6e636ef6da Mon Sep 17 00:00:00 2001
From: Nirvedh Meshram <nirvedh at gmail.com>
Date: Tue, 24 Feb 2026 15:46:39 -0600
Subject: [PATCH] [SCF] allow indexing operations for loop coalesceing
Currently if there are operations between the loops we get a dominance issue
as the delinearlized index is added after the operations. This PR fixes that.
For testing we also add a transform pattern that makes a direct call to coalesceLoops
as the exisiting pattern calls coalescePerfectlyNestedSCFForLoops which does not
consider the loop nest perfectly nested which is safer for that usage.
Signed-off-by: Nirvedh Meshram <nirvedh at gmail.com>
Co-Authored-By: Claude Opus 4.6 <noreply at anthropic.com>
---
.../SCF/TransformOps/SCFTransformOps.td | 28 +++++++++++
.../SCF/TransformOps/SCFTransformOps.cpp | 46 +++++++++++++++++
mlir/lib/Dialect/SCF/Utils/Utils.cpp | 3 +-
.../Dialect/SCF/transform-op-coalesce.mlir | 49 +++++++++++++++++++
4 files changed, 125 insertions(+), 1 deletion(-)
diff --git a/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td b/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td
index e2b42208f3f8e..f97225477ef8b 100644
--- a/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td
+++ b/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td
@@ -392,6 +392,34 @@ def LoopCoalesceOp : Op<Transform_Dialect, "loop.coalesce", [
}];
}
+def LoopCoalesceNestedOp : Op<Transform_Dialect, "loop.coalesce_nested", [
+ FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
+ TransformOpInterface, TransformEachOpTrait]> {
+ let summary = "Coalesces nested loops including imperfectly nested ones";
+ let description = [{
+ Given a loop, collects all nested loops (including imperfectly nested
+ loops with operations between them) and coalesces them directly using
+ coalesceLoops.
+
+ #### Return modes
+
+ The return handle points to the coalesced loop.
+ }];
+ let arguments = (ins TransformHandleTypeInterface:$target);
+ let results = (outs TransformHandleTypeInterface:$transformed);
+
+ let assemblyFormat =
+ "$target attr-dict `:` functional-type($target, $transformed)";
+
+ let extraClassDeclaration = [{
+ ::mlir::DiagnosedSilenceableFailure applyToOne(
+ ::mlir::transform::TransformRewriter &rewriter,
+ ::mlir::Operation *target,
+ ::mlir::transform::ApplyToEachResultList &results,
+ ::mlir::transform::TransformState &state);
+ }];
+}
+
def TakeAssumedBranchOp : Op<Transform_Dialect, "scf.take_assumed_branch", [
DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
TransformOpInterface, TransformEachOpTrait]> {
diff --git a/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp b/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp
index 71fe9870ac170..d4e46ceb946d5 100644
--- a/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp
+++ b/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp
@@ -450,6 +450,52 @@ transform::LoopCoalesceOp::applyToOne(transform::TransformRewriter &rewriter,
return DiagnosedSilenceableFailure::success();
}
+//===----------------------------------------------------------------------===//
+// LoopCoalesceNestedOp
+//===----------------------------------------------------------------------===//
+DiagnosedSilenceableFailure transform::LoopCoalesceNestedOp::applyToOne(
+ transform::TransformRewriter &rewriter, Operation *op,
+ transform::ApplyToEachResultList &results,
+ transform::TransformState &state) {
+ auto forOp = dyn_cast<scf::ForOp>(op);
+ if (!forOp) {
+ return emitSilenceableError() << "expected scf.for operation";
+ }
+
+ // Collect nested loops (including imperfectly nested ones)
+ SmallVector<scf::ForOp> nestedLoops;
+ scf::ForOp currentLoop = forOp;
+
+ while (currentLoop) {
+ nestedLoops.push_back(currentLoop);
+ Block &body = currentLoop.getRegion().front();
+
+ // Look for the next nested loop
+ scf::ForOp nextLoop = nullptr;
+ for (Operation &bodyOp : body) {
+ if (auto innerFor = dyn_cast<scf::ForOp>(&bodyOp)) {
+ nextLoop = innerFor;
+ break;
+ }
+ }
+
+ currentLoop = nextLoop;
+ }
+
+ // Need at least 2 loops to coalesce
+ if (nestedLoops.size() < 2) {
+ return emitSilenceableError() << "need at least 2 nested loops to coalesce";
+ }
+
+ // Call coalesceLoops directly
+ if (failed(coalesceLoops(rewriter, nestedLoops))) {
+ return emitSilenceableError() << "failed to coalesce nested loops";
+ }
+
+ results.push_back(nestedLoops.front());
+ return DiagnosedSilenceableFailure::success();
+}
+
//===----------------------------------------------------------------------===//
// TakeAssumedBranchOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
index 9f569ea05967c..2dd7df09e0f22 100644
--- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
@@ -957,7 +957,8 @@ LogicalResult mlir::coalesceLoops(RewriterBase &rewriter,
Value upperBound = getProductOfIntsOrIndexes(rewriter, loc, upperBounds);
outermost.setUpperBound(upperBound);
- rewriter.setInsertionPointToStart(innermost.getBody());
+ // Insert delinearization at the start of the outermost loop body.
+ rewriter.setInsertionPointToStart(outermost.getBody());
auto [delinearizeIvs, preservedUsers] = delinearizeInductionVariable(
rewriter, loc, outermost.getInductionVar(), upperBounds);
rewriter.replaceAllUsesExcept(outermost.getInductionVar(), delinearizeIvs[0],
diff --git a/mlir/test/Dialect/SCF/transform-op-coalesce.mlir b/mlir/test/Dialect/SCF/transform-op-coalesce.mlir
index 03ddee1c7a98a..467020e331295 100644
--- a/mlir/test/Dialect/SCF/transform-op-coalesce.mlir
+++ b/mlir/test/Dialect/SCF/transform-op-coalesce.mlir
@@ -365,3 +365,52 @@ module attributes {transform.with_named_sequence} {
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK: scf.for %[[IV:.+]] = %[[C0]] to %[[ARG1]] step %[[C1]]
// CHECK: "some_use"(%{{[a-zA-Z0-9]+}}, %[[C0]], %[[C0]], %[[IV]])
+
+// -----
+
+// Loops with operations between them that use the outer loop's induction variable.
+// The delinearization should be inserted early enough that these operations can
+// use the delinearized IV.
+
+func.func @opsinbetween_nested_loops(%init: f32) -> f32 {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c3 = arith.constant 3 : index
+ %c5 = arith.constant 5 : index
+
+ %result = scf.for %i = %c0 to %c3 step %c1 iter_args(%outer = %init) -> (f32) {
+ %computed = arith.addi %i, %c5 : index
+ %used = arith.muli %computed, %c3 : index
+
+ %inner_result = scf.for %j = %c0 to %c3 step %c1 iter_args(%inner = %outer) -> (f32) {
+ %updated = "use"(%inner, %i, %j, %computed, %used) : (f32, index, index, index, index) -> f32
+ scf.yield %updated : f32
+ }
+ scf.yield %inner_result : f32
+ } {coalesce_nested}
+ return %result : f32
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce_nested} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for">
+ %2 = transform.loop.coalesce_nested %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">)
+ transform.yield
+ }
+}
+// CHECK-LABEL: func @opsinbetween_nested_loops
+// CHECK-SAME: %[[INIT:.+]]: f32
+// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index
+// CHECK-DAG: %[[C5:.+]] = arith.constant 5 : index
+// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
+// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
+// CHECK-DAG: %[[C9:.+]] = arith.constant 9 : index
+// CHECK: %[[RESULT:.+]] = scf.for %[[IV:.+]] = %[[C0]] to %[[C9]] step %[[C1]] iter_args(%[[ARG:.+]] = %[[INIT]]) -> (f32) {
+// CHECK: %[[DELIN:.+]]:2 = affine.delinearize_index %[[IV]] into (3, 3)
+// CHECK: %[[COMPUTED:.+]] = arith.addi %[[DELIN]]#0, %[[C5]]
+// CHECK: %[[USED:.+]] = arith.muli %[[COMPUTED]], %[[C3]]
+// CHECK: %[[UPDATED:.+]] = "use"(%[[ARG]], %[[DELIN]]#0, %[[DELIN]]#1, %[[COMPUTED]], %[[USED]])
+// CHECK: scf.yield %[[UPDATED]]
+// CHECK: }
+// CHECK: return %[[RESULT]]
More information about the Mlir-commits
mailing list