[Mlir-commits] [mlir] [MLIR] Parallel loop fusion extended to interchanged loops. (PR #191245)
Dmitriy Smirnov
llvmlistbot at llvm.org
Thu Apr 9 09:55:26 PDT 2026
https://github.com/d-smirnov created https://github.com/llvm/llvm-project/pull/191245
Patch extends fusion of two parallel loops to the case where the second parallel loop comprises of two interchanged loops of same iteration space.
>From 493470fe9928c18bb05216f6377f3608b204d51a Mon Sep 17 00:00:00 2001
From: Dmitriy Smirnov <dmitriy.smirnov at arm.com>
Date: Wed, 25 Mar 2026 14:47:57 +0000
Subject: [PATCH] [MLIR] Parallel loop fusion extended to interchanged loops.
Patch extends fusion of two parallel loops to the case where the second
parallel loop comprises of two interchanged loops of same iteration
space.
Change-Id: I83c2c1a10698bf16c499d8da27d02c4c3f0f34a1
---
.../SCF/Transforms/ParallelLoopFusion.cpp | 54 ++++++++++++++++++-
.../Dialect/SCF/parallel-loop-fusion.mlir | 26 +++++++++
2 files changed, 78 insertions(+), 2 deletions(-)
diff --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp
index 0b132e9109492..e0e6aad612aa8 100644
--- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp
@@ -733,6 +733,37 @@ static bool isFusionLegal(ParallelOp firstPloop, ParallelOp secondPloop,
firstToSecondPloopIndices, mayAlias, b);
}
+// Interchange loops of the parallel loop, if there are just two loops
+std::optional<ParallelOp> interchangeLoops(OpBuilder &builder,
+ ParallelOp &loop) {
+
+ if (loop.getNumLoops() != 2)
+ return std::nullopt;
+
+ OpBuilder::InsertPoint ip = builder.saveInsertionPoint();
+
+ // Replace the parallel loop with the same parallel loop.
+ builder.setInsertionPoint(loop);
+ auto newOp = ParallelOp::create(builder, loop.getLoc(), loop.getLowerBound(),
+ loop.getUpperBound(), loop.getStep(),
+ loop.getInitVals(), nullptr);
+ IRMapping mapping;
+ for (auto [iv, riv] : llvm::zip(loop.getInductionVars(),
+ llvm::reverse(newOp.getInductionVars()))) {
+ mapping.map(iv, riv);
+ }
+ // Copy parallel loop body
+ if (newOp) {
+ builder.setInsertionPoint(&(newOp.getBody()->front()));
+ for (auto &o : loop.getRegion().getOps()) {
+ if (!isa<scf::ReduceOp>(o))
+ builder.clone(o, mapping);
+ }
+ }
+ builder.restoreInsertionPoint(ip);
+ return newOp;
+}
+
/// Prepend operations of firstPloop's body into secondPloop's body.
/// Update secondPloop with new loop.
static void fuseIfLegal(ParallelOp firstPloop, ParallelOp &secondPloop,
@@ -744,8 +775,27 @@ static void fuseIfLegal(ParallelOp firstPloop, ParallelOp &secondPloop,
firstToSecondPloopIndices.map(block1->getArguments(), block2->getArguments());
if (!isFusionLegal(firstPloop, secondPloop, firstToSecondPloopIndices,
- mayAlias, builder))
- return;
+ mayAlias, builder)) {
+ // If second parallel loop consists of two loops of same iteration space
+ // then exchange these loops and re-asses the possibility of fusion.
+ if (secondPloop.getNumLoops() == 2 &&
+ secondPloop.getUpperBound()[0] == secondPloop.getUpperBound()[1] &&
+ secondPloop.getLowerBound()[0] == secondPloop.getLowerBound()[1] &&
+ secondPloop.getStep()[0] == secondPloop.getStep()[1]) {
+ firstToSecondPloopIndices.clear();
+ firstToSecondPloopIndices.map(block1->getArguments(),
+ llvm::reverse(block2->getArguments()));
+ if (!isFusionLegal(firstPloop, secondPloop, firstToSecondPloopIndices,
+ mayAlias, builder))
+ return;
+ auto newLoop = interchangeLoops(builder, secondPloop);
+ secondPloop->erase();
+ secondPloop = *newLoop;
+ block2 = secondPloop.getBody();
+ } else {
+ return;
+ }
+ }
DominanceInfo dom;
// We are fusing first loop into second, make sure there are no users of the
diff --git a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
index d876062b704f2..ac6ab11963bd5 100644
--- a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
+++ b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
@@ -1223,3 +1223,29 @@ func.func @reductions_use_res_between(%A: memref<2x2xf32>, %B: memref<2x2xf32>)
// CHECK-LABEL: func @reductions_use_res_between
// CHECK: scf.parallel
// CHECK: scf.parallel
+
+// -----
+
+func.func @test_fuse_interchanged_loops(%arg0: memref<1x64xf32>) {
+ %c8 = arith.constant 8 : index
+ %c1 = arith.constant 1 : index
+ %c0 = arith.constant 0 : index
+ %alloc_0 = memref.alloc() : memref<1x8x8xf32>
+ %alloc = memref.alloc() {alignment = 64 : i64} : memref<8x8x1xf32>
+ scf.parallel (%arg2, %arg3) = (%c0, %c0) to (%c8, %c8) step (%c1, %c1) {
+ %0 = memref.load %alloc_0[%c0, %arg2, %arg3] : memref<1x8x8xf32>
+ memref.store %0, %alloc[%arg3, %arg2, %c0] : memref<8x8x1xf32>
+ scf.reduce
+ }
+ scf.parallel (%arg2, %arg3) = (%c0, %c0) to (%c8, %c8) step (%c1, %c1) {
+ %0 = memref.load %alloc[%arg2, %arg3, %c0] : memref<8x8x1xf32>
+ %1 = affine.apply affine_map<(d0, d1) -> (d0 * 8 + d1)>(%arg2, %arg3)
+ memref.store %0, %arg0[%c0, %1] : memref<1x64xf32>
+ scf.reduce
+ }
+ return
+}
+
+// CHECK-LABEL: func @test_fuse_interchanged_loops
+// CHECK: scf.parallel
+// CHECK-NOT: scf.parallel
More information about the Mlir-commits
mailing list