[Mlir-commits] [mlir] 25fcc87 - [acc] Fix acc.loop to scf utilities (#178809)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Fri Jan 30 08:02:35 PST 2026
Author: Razvan Lupusoru
Date: 2026-01-30T08:02:30-08:00
New Revision: 25fcc8702ddf9254ea5203348cea4681f973e93d
URL: https://github.com/llvm/llvm-project/commit/25fcc8702ddf9254ea5203348cea4681f973e93d
DIFF: https://github.com/llvm/llvm-project/commit/25fcc8702ddf9254ea5203348cea4681f973e93d.diff
LOG: [acc] Fix acc.loop to scf utilities (#178809)
Fixes a problem encountered with enabling coalesceLoops when bounds were
constructed inside expanded loops. Additionally, ensures that all loop
utilities use rewriter instead of their own builders for proper
tracking.
Added:
Modified:
mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
index 477ee9ee48358..e271052eade60 100644
--- a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
+++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
@@ -109,12 +109,14 @@ static void normalizeIVUses(OpBuilder &b, Location loc, Value iv, Value origLB,
/// Returns the insertion point after the cloned operations.
static Block::iterator cloneACCRegionInto(Region *src, Block *dest,
Block::iterator insertionPoint,
- IRMapping &mapping) {
+ IRMapping &mapping,
+ RewriterBase &rewriter) {
assert(src->hasOneBlock() && "expected single-block region");
Region *insertRegion = dest->getParent();
- Block *postInsertBlock = dest->splitBlock(insertionPoint);
- src->cloneInto(insertRegion, postInsertBlock->getIterator(), mapping);
+ Block *postInsertBlock = rewriter.splitBlock(dest, insertionPoint);
+ rewriter.cloneRegionBefore(*src, *insertRegion,
+ postInsertBlock->getIterator(), mapping);
auto lastNewBlock = std::prev(postInsertBlock->getIterator());
@@ -123,23 +125,20 @@ static Block::iterator cloneACCRegionInto(Region *src, Block *dest,
if (auto yieldOp = dyn_cast<acc::YieldOp>(terminator)) {
newInsertionPoint = std::prev(yieldOp->getIterator());
- yieldOp.erase();
+ rewriter.eraseOp(yieldOp);
} else if (auto terminatorOp = dyn_cast<acc::TerminatorOp>(terminator)) {
newInsertionPoint = std::prev(terminatorOp->getIterator());
- terminatorOp.erase();
+ rewriter.eraseOp(terminatorOp);
} else {
llvm_unreachable("unexpected terminator in ACC region");
}
// Merge last block with the postInsertBlock
- lastNewBlock->getOperations().splice(lastNewBlock->end(),
- postInsertBlock->getOperations());
- postInsertBlock->erase();
+ rewriter.mergeBlocks(postInsertBlock, &*lastNewBlock);
// Merge first block with original dest block
- auto firstNewBlock = std::next(dest->getIterator());
- dest->getOperations().splice(dest->end(), firstNewBlock->getOperations());
- firstNewBlock->erase();
+ Block *firstNewBlock = &*std::next(dest->getIterator());
+ rewriter.mergeBlocks(firstNewBlock, dest);
return newInsertionPoint;
}
@@ -193,21 +192,32 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
OpBuilder::InsertionGuard guard(rewriter);
rewriter.setInsertionPoint(loopOp);
+ // First, compute ALL loop bounds at the current insertion point (before
+ // any ForOp). This ensures all bounds are defined in the outer scope,
+ // which is required for coalesceLoops to work correctly.
+ SmallVector<Value> lowerBounds, upperBounds, steps;
for (BlockArgument iv : loopOp.getBody().getArguments()) {
size_t idx = iv.getArgNumber();
-
- // For nested loops, insert inside the previous loop's body
- if (idx > 0)
- rewriter.setInsertionPointToStart(forOps.back().getBody());
-
Value newLowerBound = getValueOrCreateCastToIndexLike(
rewriter, loc, indexType, loopOp.getLowerbound()[idx]);
Value newUpperBound = getExclusiveUpperBoundAsIndex(loopOp, idx, rewriter);
Value newStep = getValueOrCreateCastToIndexLike(rewriter, loc, indexType,
loopOp.getStep()[idx]);
+ lowerBounds.push_back(newLowerBound);
+ upperBounds.push_back(newUpperBound);
+ steps.push_back(newStep);
+ }
+
+ // Now create the nested ForOps using the pre-computed bounds
+ for (BlockArgument iv : loopOp.getBody().getArguments()) {
+ size_t idx = iv.getArgNumber();
+
+ // For nested loops, insert inside the previous loop's body
+ if (idx > 0)
+ rewriter.setInsertionPointToStart(forOps.back().getBody());
- scf::ForOp forOp = scf::ForOp::create(rewriter, loc, newLowerBound,
- newUpperBound, newStep);
+ scf::ForOp forOp = scf::ForOp::create(rewriter, loc, lowerBounds[idx],
+ upperBounds[idx], steps[idx]);
forOps.push_back(forOp);
mapping.map(iv, forOp.getInductionVar());
}
@@ -223,11 +233,11 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
// Clone the loop body into the innermost scf.for
cloneACCRegionInto(&loopOp.getRegion(), forOps.back().getBody(),
- rewriter.getInsertionPoint(), mapping);
+ rewriter.getInsertionPoint(), mapping, rewriter);
// Optionally collapse nested loops
if (enableCollapse && forOps.size() > 1)
- if (failed(coalesceLoops(forOps)))
+ if (failed(coalesceLoops(rewriter, forOps)))
loopOp.emitError("failed to collapse acc.loop");
return forOps.front();
@@ -283,7 +293,7 @@ scf::ParallelOp convertACCLoopToSCFParallel(LoopOp loopOp,
}
} else {
cloneACCRegionInto(&loopOp.getRegion(), parallelOp.getBody(),
- rewriter.getInsertionPoint(), mapping);
+ rewriter.getInsertionPoint(), mapping, rewriter);
}
// Denormalize IV uses
diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
index 251a2ac6078bd..0176f64c8db3b 100644
--- a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
+++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
@@ -309,6 +309,36 @@ TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForNoCollapse) {
EXPECT_TRUE(hasNestedFor);
}
+TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForWithCollapseAndDynamicBounds) {
+ // Test with dynamic (non-constant) bounds to ensure all bounds are computed
+ // before any ForOp is created. This is required for coalesceLoops to work
+ // correctly (called when enableCollapse is true) - if inner loop bounds were
+ // computed inside the outer loop, coalesceLoops would create ops that
+ // reference values from a nested region.
+ auto [module, funcOp] = createModuleWithFuncArgs(
+ {b.getIndexType(), b.getIndexType(), b.getIndexType(), b.getIndexType()});
+
+ // Use function arguments as dynamic bounds
+ Value lb0 = funcOp.getArgument(0);
+ Value ub0 = funcOp.getArgument(1);
+ Value lb1 = funcOp.getArgument(2);
+ Value ub1 = funcOp.getArgument(3);
+ Value c1 = createIndexConstant(1);
+
+ acc::LoopOp loopOp = createLoopOp({lb0, lb1}, {ub0, ub1}, {c1, c1});
+ scf::ForOp forOp = convertACCLoopToSCFFor(loopOp, b, /*enableCollapse=*/true);
+
+ ASSERT_TRUE(forOp);
+
+ // With collapse, there should be NO nested for loops
+ bool hasNestedFor = false;
+ forOp.getBody()->walk([&](scf::ForOp) { hasNestedFor = true; });
+ EXPECT_FALSE(hasNestedFor);
+
+ // Verify the IR is valid
+ EXPECT_TRUE(module->verify().succeeded());
+}
+
TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForExclusiveUpperBound) {
auto [module, funcOp] = createModuleWithFunc();
More information about the Mlir-commits
mailing list