[Mlir-commits] [mlir] 25fcc87 - [acc] Fix acc.loop to scf utilities (#178809)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Fri Jan 30 08:02:35 PST 2026


Author: Razvan Lupusoru
Date: 2026-01-30T08:02:30-08:00
New Revision: 25fcc8702ddf9254ea5203348cea4681f973e93d

URL: https://github.com/llvm/llvm-project/commit/25fcc8702ddf9254ea5203348cea4681f973e93d
DIFF: https://github.com/llvm/llvm-project/commit/25fcc8702ddf9254ea5203348cea4681f973e93d.diff

LOG: [acc] Fix acc.loop to scf utilities (#178809)

Fixes a problem encountered with enabling coalesceLoops when bounds were
constructed inside expanded loops. Additionally, ensures that all loop
utilities use rewriter instead of their own builders for proper
tracking.

Added: 
    

Modified: 
    mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
    mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
index 477ee9ee48358..e271052eade60 100644
--- a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
+++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
@@ -109,12 +109,14 @@ static void normalizeIVUses(OpBuilder &b, Location loc, Value iv, Value origLB,
 /// Returns the insertion point after the cloned operations.
 static Block::iterator cloneACCRegionInto(Region *src, Block *dest,
                                           Block::iterator insertionPoint,
-                                          IRMapping &mapping) {
+                                          IRMapping &mapping,
+                                          RewriterBase &rewriter) {
   assert(src->hasOneBlock() && "expected single-block region");
 
   Region *insertRegion = dest->getParent();
-  Block *postInsertBlock = dest->splitBlock(insertionPoint);
-  src->cloneInto(insertRegion, postInsertBlock->getIterator(), mapping);
+  Block *postInsertBlock = rewriter.splitBlock(dest, insertionPoint);
+  rewriter.cloneRegionBefore(*src, *insertRegion,
+                             postInsertBlock->getIterator(), mapping);
 
   auto lastNewBlock = std::prev(postInsertBlock->getIterator());
 
@@ -123,23 +125,20 @@ static Block::iterator cloneACCRegionInto(Region *src, Block *dest,
 
   if (auto yieldOp = dyn_cast<acc::YieldOp>(terminator)) {
     newInsertionPoint = std::prev(yieldOp->getIterator());
-    yieldOp.erase();
+    rewriter.eraseOp(yieldOp);
   } else if (auto terminatorOp = dyn_cast<acc::TerminatorOp>(terminator)) {
     newInsertionPoint = std::prev(terminatorOp->getIterator());
-    terminatorOp.erase();
+    rewriter.eraseOp(terminatorOp);
   } else {
     llvm_unreachable("unexpected terminator in ACC region");
   }
 
   // Merge last block with the postInsertBlock
-  lastNewBlock->getOperations().splice(lastNewBlock->end(),
-                                       postInsertBlock->getOperations());
-  postInsertBlock->erase();
+  rewriter.mergeBlocks(postInsertBlock, &*lastNewBlock);
 
   // Merge first block with original dest block
-  auto firstNewBlock = std::next(dest->getIterator());
-  dest->getOperations().splice(dest->end(), firstNewBlock->getOperations());
-  firstNewBlock->erase();
+  Block *firstNewBlock = &*std::next(dest->getIterator());
+  rewriter.mergeBlocks(firstNewBlock, dest);
 
   return newInsertionPoint;
 }
@@ -193,21 +192,32 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
   OpBuilder::InsertionGuard guard(rewriter);
   rewriter.setInsertionPoint(loopOp);
 
+  // First, compute ALL loop bounds at the current insertion point (before
+  // any ForOp). This ensures all bounds are defined in the outer scope,
+  // which is required for coalesceLoops to work correctly.
+  SmallVector<Value> lowerBounds, upperBounds, steps;
   for (BlockArgument iv : loopOp.getBody().getArguments()) {
     size_t idx = iv.getArgNumber();
-
-    // For nested loops, insert inside the previous loop's body
-    if (idx > 0)
-      rewriter.setInsertionPointToStart(forOps.back().getBody());
-
     Value newLowerBound = getValueOrCreateCastToIndexLike(
         rewriter, loc, indexType, loopOp.getLowerbound()[idx]);
     Value newUpperBound = getExclusiveUpperBoundAsIndex(loopOp, idx, rewriter);
     Value newStep = getValueOrCreateCastToIndexLike(rewriter, loc, indexType,
                                                     loopOp.getStep()[idx]);
+    lowerBounds.push_back(newLowerBound);
+    upperBounds.push_back(newUpperBound);
+    steps.push_back(newStep);
+  }
+
+  // Now create the nested ForOps using the pre-computed bounds
+  for (BlockArgument iv : loopOp.getBody().getArguments()) {
+    size_t idx = iv.getArgNumber();
+
+    // For nested loops, insert inside the previous loop's body
+    if (idx > 0)
+      rewriter.setInsertionPointToStart(forOps.back().getBody());
 
-    scf::ForOp forOp = scf::ForOp::create(rewriter, loc, newLowerBound,
-                                          newUpperBound, newStep);
+    scf::ForOp forOp = scf::ForOp::create(rewriter, loc, lowerBounds[idx],
+                                          upperBounds[idx], steps[idx]);
     forOps.push_back(forOp);
     mapping.map(iv, forOp.getInductionVar());
   }
@@ -223,11 +233,11 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
 
   // Clone the loop body into the innermost scf.for
   cloneACCRegionInto(&loopOp.getRegion(), forOps.back().getBody(),
-                     rewriter.getInsertionPoint(), mapping);
+                     rewriter.getInsertionPoint(), mapping, rewriter);
 
   // Optionally collapse nested loops
   if (enableCollapse && forOps.size() > 1)
-    if (failed(coalesceLoops(forOps)))
+    if (failed(coalesceLoops(rewriter, forOps)))
       loopOp.emitError("failed to collapse acc.loop");
 
   return forOps.front();
@@ -283,7 +293,7 @@ scf::ParallelOp convertACCLoopToSCFParallel(LoopOp loopOp,
     }
   } else {
     cloneACCRegionInto(&loopOp.getRegion(), parallelOp.getBody(),
-                       rewriter.getInsertionPoint(), mapping);
+                       rewriter.getInsertionPoint(), mapping, rewriter);
   }
 
   // Denormalize IV uses

diff  --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
index 251a2ac6078bd..0176f64c8db3b 100644
--- a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
+++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
@@ -309,6 +309,36 @@ TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForNoCollapse) {
   EXPECT_TRUE(hasNestedFor);
 }
 
+TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForWithCollapseAndDynamicBounds) {
+  // Test with dynamic (non-constant) bounds to ensure all bounds are computed
+  // before any ForOp is created. This is required for coalesceLoops to work
+  // correctly (called when enableCollapse is true) - if inner loop bounds were
+  // computed inside the outer loop, coalesceLoops would create ops that
+  // reference values from a nested region.
+  auto [module, funcOp] = createModuleWithFuncArgs(
+      {b.getIndexType(), b.getIndexType(), b.getIndexType(), b.getIndexType()});
+
+  // Use function arguments as dynamic bounds
+  Value lb0 = funcOp.getArgument(0);
+  Value ub0 = funcOp.getArgument(1);
+  Value lb1 = funcOp.getArgument(2);
+  Value ub1 = funcOp.getArgument(3);
+  Value c1 = createIndexConstant(1);
+
+  acc::LoopOp loopOp = createLoopOp({lb0, lb1}, {ub0, ub1}, {c1, c1});
+  scf::ForOp forOp = convertACCLoopToSCFFor(loopOp, b, /*enableCollapse=*/true);
+
+  ASSERT_TRUE(forOp);
+
+  // With collapse, there should be NO nested for loops
+  bool hasNestedFor = false;
+  forOp.getBody()->walk([&](scf::ForOp) { hasNestedFor = true; });
+  EXPECT_FALSE(hasNestedFor);
+
+  // Verify the IR is valid
+  EXPECT_TRUE(module->verify().succeeded());
+}
+
 TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForExclusiveUpperBound) {
   auto [module, funcOp] = createModuleWithFunc();
 


        


More information about the Mlir-commits mailing list