[Mlir-commits] [mlir] [mlir][scf] Fold away `scf.for` iter args cycles (PR #173436)
Ivan Butygin
llvmlistbot at llvm.org
Tue Dec 23 16:56:50 PST 2025
https://github.com/Hardcode84 created https://github.com/llvm/llvm-project/pull/173436
When iter args form cycle through region args/yields with the same init value, we can replace them all with that init value.
>From 90fa854df8a7dee03ae9d6f0372b2de9cacfe370 Mon Sep 17 00:00:00 2001
From: Ivan Butygin <ivan.butygin at gmail.com>
Date: Wed, 24 Dec 2025 01:41:50 +0100
Subject: [PATCH] [mlir][scf] Fold away `scf.for` iter args cycles
---
mlir/lib/Dialect/SCF/IR/SCF.cpp | 99 +++++++++++++++++++++++--
mlir/test/Dialect/SCF/canonicalize.mlir | 66 +++++++++++------
2 files changed, 136 insertions(+), 29 deletions(-)
diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp
index 652414f6cbe54..99802cfe4b662 100644
--- a/mlir/lib/Dialect/SCF/IR/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp
@@ -1001,7 +1001,7 @@ namespace {
// The implementation uses `inlineBlockBefore` to steal the content of the
// original ForOp and avoid cloning.
struct ForOpIterArgsFolder : public OpRewritePattern<scf::ForOp> {
- using OpRewritePattern<scf::ForOp>::OpRewritePattern;
+ using Base::Base;
LogicalResult matchAndRewrite(scf::ForOp forOp,
PatternRewriter &rewriter) const final {
@@ -1133,7 +1133,7 @@ struct ForOpIterArgsFolder : public OpRewritePattern<scf::ForOp> {
/// single-iteration loops with their bodies, and removes empty loops that
/// iterate at least once and only return values defined outside of the loop.
struct SimplifyTrivialLoops : public OpRewritePattern<ForOp> {
- using OpRewritePattern<ForOp>::OpRewritePattern;
+ using Base::Base;
LogicalResult matchAndRewrite(ForOp op,
PatternRewriter &rewriter) const override {
@@ -1204,7 +1204,7 @@ struct SimplifyTrivialLoops : public OpRewritePattern<ForOp> {
/// use_of(%1)
/// ```
struct ForOpTensorCastFolder : public OpRewritePattern<ForOp> {
- using OpRewritePattern<ForOp>::OpRewritePattern;
+ using Base::Base;
LogicalResult matchAndRewrite(ForOp op,
PatternRewriter &rewriter) const override {
@@ -1236,12 +1236,101 @@ struct ForOpTensorCastFolder : public OpRewritePattern<ForOp> {
}
};
+/// Rewriting pattern that folds away cycles in the yield of a scf.for op.
+///
+/// ```
+/// %res:2 = scf.for ... iter_args(%arg0 = %init, %arg1 = %init) {
+/// ...
+/// use %arg0, %arg1
+/// scf.yield %arg1, %arg0
+/// }
+/// return %res#0, %res#1
+/// ```
+///
+/// folds into:
+///
+/// ```
+/// scf.for ... iter_args() {
+/// ...
+/// use %init, %init
+/// scf.yield
+/// }
+/// return %init, %init
+/// ```
+struct ForOpYieldCyclesFolder : public OpRewritePattern<ForOp> {
+ using Base::Base;
+
+ LogicalResult matchAndRewrite(ForOp op,
+ PatternRewriter &rewriter) const override {
+ ValueRange yieldedValues = op.getYieldedValues();
+ ValueRange initArgs = op.getInitArgs();
+ ValueRange results = op.getResults();
+ ValueRange regionIterArgs = op.getRegionIterArgs();
+ Block *body = op.getBody();
+
+ unsigned numYieldedValues = op.getNumRegionIterArgs();
+
+ bool changed = false;
+ SmallVector<unsigned> cycle;
+ llvm::SmallBitVector visited(numYieldedValues, false);
+ for (auto start : llvm::seq(numYieldedValues)) {
+ if (visited[start])
+ continue;
+
+ cycle.clear();
+ unsigned current = start;
+ bool validCycle = true;
+ Value initValue = initArgs[start];
+ while (!visited[current]) {
+ cycle.push_back(current);
+ visited[current] = true;
+
+ // Find whether this yield is from a region iter arg.
+ auto yieldedValue = yieldedValues[current];
+ if (auto arg = dyn_cast<BlockArgument>(yieldedValue);
+ !arg || arg.getOwner() != body) {
+ validCycle = false;
+ break;
+ }
+
+ unsigned next = cast<BlockArgument>(yieldedValue).getArgNumber() -
+ op.getNumInductionVars();
+
+ // Check if next position has the same init value.
+ if (initArgs[next] != initValue) {
+ validCycle = false;
+ break;
+ }
+
+ current = next;
+
+ // Completed the cycle.
+ if (current == start)
+ break;
+ }
+
+ // If we found a valid cycle of length > 1, all values in it
+ // are always equal to initValue.
+ if (validCycle && cycle.size() > 1) {
+ changed = true;
+ for (unsigned idx : cycle) {
+ // This will leave region args and results dead so other
+ // canonicalization patterns can clean them up.
+ rewriter.replaceAllUsesWith(regionIterArgs[idx], initValue);
+ rewriter.replaceAllUsesWith(results[idx], initValue);
+ }
+ }
+ }
+ return success(changed);
+ }
+};
+
} // namespace
void ForOp::getCanonicalizationPatterns(RewritePatternSet &results,
MLIRContext *context) {
- results.add<ForOpIterArgsFolder, SimplifyTrivialLoops, ForOpTensorCastFolder>(
- context);
+ results.add<ForOpIterArgsFolder, SimplifyTrivialLoops, ForOpTensorCastFolder,
+ ForOpYieldCyclesFolder>(context);
}
std::optional<APInt> ForOp::getConstantStep() {
diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir
index ac590fc0c47b9..e69bbff0254e1 100644
--- a/mlir/test/Dialect/SCF/canonicalize.mlir
+++ b/mlir/test/Dialect/SCF/canonicalize.mlir
@@ -1665,11 +1665,11 @@ func.func @func_execute_region_inline_multi_yield() {
module {
func.func private @foo()->()
func.func private @execute_region_yeilding_external_value() -> memref<1x60xui8> {
- %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8>
- %1 = scf.execute_region -> memref<1x60xui8> no_inline {
+ %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8>
+ %1 = scf.execute_region -> memref<1x60xui8> no_inline {
func.call @foo():()->()
scf.yield %alloc: memref<1x60xui8>
- }
+ }
return %1 : memref<1x60xui8>
}
}
@@ -1688,12 +1688,12 @@ func.func private @execute_region_yeilding_external_value() -> memref<1x60xui8>
module {
func.func private @foo()->()
func.func private @execute_region_yeilding_external_and_local_values() -> (memref<1x60xui8>, memref<1x120xui8>) {
- %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8>
- %1, %2 = scf.execute_region -> (memref<1x60xui8>, memref<1x120xui8>) no_inline {
+ %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8>
+ %1, %2 = scf.execute_region -> (memref<1x60xui8>, memref<1x120xui8>) no_inline {
%alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<1x120xui8>
func.call @foo():()->()
scf.yield %alloc, %alloc_1: memref<1x60xui8>, memref<1x120xui8>
- }
+ }
return %1, %2 : memref<1x60xui8>, memref<1x120xui8>
}
}
@@ -1716,18 +1716,18 @@ func.func private @execute_region_yeilding_external_and_local_values() -> (memre
module {
func.func private @foo()->()
func.func private @execute_region_multiple_yields_same_operands() -> (memref<1x60xui8>, memref<1x120xui8>) {
- %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8>
- %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<1x120xui8>
+ %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8>
+ %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<1x120xui8>
%1, %2 = scf.execute_region -> (memref<1x60xui8>, memref<1x120xui8>) no_inline {
%c = "test.cmp"() : () -> i1
cf.cond_br %c, ^bb2, ^bb3
- ^bb2:
+ ^bb2:
func.call @foo():()->()
scf.yield %alloc, %alloc_1 : memref<1x60xui8>, memref<1x120xui8>
- ^bb3:
- func.call @foo():()->()
+ ^bb3:
+ func.call @foo():()->()
scf.yield %alloc, %alloc_1 : memref<1x60xui8>, memref<1x120xui8>
- }
+ }
return %1, %2 : memref<1x60xui8>, memref<1x120xui8>
}
}
@@ -1746,19 +1746,19 @@ module {
module {
func.func private @foo()->()
func.func private @execute_region_multiple_yields_different_operands() -> (memref<1x60xui8>, memref<1x120xui8>) {
- %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8>
- %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<1x120xui8>
- %alloc_2 = memref.alloc() {alignment = 64 : i64} : memref<1x120xui8>
+ %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8>
+ %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<1x120xui8>
+ %alloc_2 = memref.alloc() {alignment = 64 : i64} : memref<1x120xui8>
%1, %2 = scf.execute_region -> (memref<1x60xui8>, memref<1x120xui8>) no_inline {
%c = "test.cmp"() : () -> i1
cf.cond_br %c, ^bb2, ^bb3
- ^bb2:
+ ^bb2:
func.call @foo():()->()
scf.yield %alloc, %alloc_1 : memref<1x60xui8>, memref<1x120xui8>
- ^bb3:
- func.call @foo():()->()
+ ^bb3:
+ func.call @foo():()->()
scf.yield %alloc, %alloc_2 : memref<1x60xui8>, memref<1x120xui8>
- }
+ }
return %1, %2 : memref<1x60xui8>, memref<1x120xui8>
}
}
@@ -1778,18 +1778,18 @@ module {
module {
func.func private @foo()->()
func.func private @execute_region_multiple_yields_different_operands() -> (memref<1x60xui8>) {
- %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8>
- %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8>
+ %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8>
+ %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<1x60xui8>
%1 = scf.execute_region -> (memref<1x60xui8>) no_inline {
%c = "test.cmp"() : () -> i1
cf.cond_br %c, ^bb2, ^bb3
- ^bb2:
+ ^bb2:
func.call @foo():()->()
scf.yield %alloc : memref<1x60xui8>
- ^bb3:
+ ^bb3:
func.call @foo():()->()
scf.yield %alloc_1 : memref<1x60xui8>
- }
+ }
return %1 : memref<1x60xui8>
}
}
@@ -2171,3 +2171,21 @@ func.func @scf_for_all_step_size_0() {
}
return
}
+
+// -----
+
+func.func private @side_effect()
+
+// CHECK-LABEL: func @iter_args_cycles
+// CHECK-SAME: (%[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index, %[[A:.*]]: i32, %[[B:.*]]: i64, %[[C:.*]]: f32)
+// CHECK: scf.for %[[IV:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] {
+// CHECK: func.call @side_effect() : () -> ()
+// CHECK-NOT: yield
+// CHECK: return %[[A]], %[[B]], %[[A]], %[[B]], %[[B]], %[[C]] : i32, i64, i32, i64, i64, f32
+func.func @iter_args_cycles(%lb : index, %ub : index, %step : index, %a : i32, %b : i64, %c : f32) -> (i32, i64, i32, i64, i64, f32) {
+ %res:6 = scf.for %i = %lb to %ub step %step iter_args(%0 = %a, %1 = %b, %2 = %a, %3 = %b, %4 = %b, %5 = %c) -> (i32, i64, i32, i64, i64, f32) {
+ func.call @side_effect() : () -> ()
+ scf.yield %2, %4, %0, %1, %3, %5 : i32, i64, i32, i64, i64, f32
+ }
+ return %res#0, %res#1, %res#2, %res#3, %res#4, %res#5 : i32, i64, i32, i64, i64, f32
+}
More information about the Mlir-commits
mailing list