[Mlir-commits] [mlir] [mlir][OpenACC] Normalize loop bounds in convertACCLoopToSCFFor for negative steps (PR #184935)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Mon Mar 9 10:55:13 PDT 2026


https://github.com/khaki3 updated https://github.com/llvm/llvm-project/pull/184935

>From 24122a8eef48c6e82e0d2beb7be0f31b5e136dee Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Thu, 5 Mar 2026 18:19:06 -0800
Subject: [PATCH 1/3] [mlir][OpenACC] Normalize loop bounds in
 convertACCLoopToSCFFor

convertACCLoopToSCFFor was passing acc.loop bounds directly to scf.for,
which can produce an scf.for with a negative step when the source is a
Fortran DO loop counting down (e.g. DO k = n, 1, -1). scf.for requires
a positive step, so this led to invalid IR and downstream crashes.

Normalize all loops to lb=0, step=1, ub=tripCount and denormalize IV
uses inside the body, matching what convertACCLoopToSCFParallel already
does.

Made-with: Cursor
---
 .../OpenACC/Utils/OpenACCUtilsLoop.cpp        | 45 +++++++----
 .../Dialect/OpenACC/OpenACCUtilsLoopTest.cpp  | 75 ++++++++++++++-----
 2 files changed, 85 insertions(+), 35 deletions(-)

diff --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
index de4f0a26a0f11..9325478642040 100644
--- a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
+++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
@@ -208,7 +208,6 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
          "loops");
 
   Location loc = loopOp->getLoc();
-  Type indexType = rewriter.getIndexType();
 
   // Create nested scf.for loops and build IR mapping for IVs
   IRMapping mapping;
@@ -218,23 +217,27 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
   OpBuilder::InsertionGuard guard(rewriter);
   rewriter.setInsertionPoint(loopOp);
 
-  // First, compute ALL loop bounds at the current insertion point (before
-  // any ForOp). This ensures all bounds are defined in the outer scope,
-  // which is required for coalesceLoops to work correctly.
-  SmallVector<Value> lowerBounds, upperBounds, steps;
+  // Normalize all loops: lb=0, step=1, ub=tripCount.
+  // This is required because scf.for mandates a positive step, but acc.loop
+  // may have a negative step (e.g. Fortran DO loops counting down).
+  Value zero = arith::ConstantIndexOp::create(rewriter, loc, 0);
+  Value one = arith::ConstantIndexOp::create(rewriter, loc, 1);
+
+  SmallVector<Value> tripCounts;
   for (BlockArgument iv : loopOp.getBody().getArguments()) {
     size_t idx = iv.getArgNumber();
-    Value newLowerBound = getValueOrCreateCastToIndexLike(
-        rewriter, loc, indexType, loopOp.getLowerbound()[idx]);
-    Value newUpperBound = getExclusiveUpperBoundAsIndex(loopOp, idx, rewriter);
-    Value newStep = getValueOrCreateCastToIndexLike(rewriter, loc, indexType,
-                                                    loopOp.getStep()[idx]);
-    lowerBounds.push_back(newLowerBound);
-    upperBounds.push_back(newUpperBound);
-    steps.push_back(newStep);
+    bool inclusiveUpperbound = false;
+    if (loopOp.getInclusiveUpperbound().has_value())
+      inclusiveUpperbound =
+          loopOp.getInclusiveUpperboundAttr().asArrayRef()[idx];
+
+    Value tc = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
+                                  loopOp.getUpperbound()[idx],
+                                  loopOp.getStep()[idx], inclusiveUpperbound);
+    tripCounts.push_back(tc);
   }
 
-  // Now create the nested ForOps using the pre-computed bounds
+  // Now create the nested ForOps using normalized bounds
   for (BlockArgument iv : loopOp.getBody().getArguments()) {
     size_t idx = iv.getArgNumber();
 
@@ -242,8 +245,8 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
     if (idx > 0)
       rewriter.setInsertionPointToStart(forOps.back().getBody());
 
-    scf::ForOp forOp = scf::ForOp::create(rewriter, loc, lowerBounds[idx],
-                                          upperBounds[idx], steps[idx]);
+    scf::ForOp forOp =
+        scf::ForOp::create(rewriter, loc, zero, tripCounts[idx], one);
     forOps.push_back(forOp);
     mapping.map(iv, forOp.getInductionVar());
   }
@@ -261,6 +264,16 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
   cloneACCRegionIntoForLoop(&loopOp.getRegion(), forOps.back().getBody(),
                             rewriter.getInsertionPoint(), mapping, rewriter);
 
+  // Denormalize IV uses: original_iv = normalized_iv * orig_step + orig_lb
+  for (size_t idx = 0; idx < forOps.size(); ++idx) {
+    Value iv = forOps[idx].getInductionVar();
+    if (!iv.use_empty()) {
+      rewriter.setInsertionPointToStart(forOps[idx].getBody());
+      normalizeIVUses(rewriter, loc, iv, loopOp.getLowerbound()[idx],
+                      loopOp.getStep()[idx]);
+    }
+  }
+
   // Optionally collapse nested loops
   if (enableCollapse && forOps.size() > 1)
     if (failed(coalesceLoops(rewriter, forOps)))
diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
index 3b1558e4f4146..e54a4f5bb4f1f 100644
--- a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
+++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
@@ -231,10 +231,9 @@ TEST_F(OpenACCUtilsLoopTest, ConvertLoopWithI32Bounds) {
 TEST_F(OpenACCUtilsLoopTest, ConvertLoopWithNonConstantBounds) {
   auto [module, funcOp] =
       createModuleWithFuncArgs({b.getIndexType(), b.getIndexType()});
-  Block &entryBlock = funcOp.getBody().front();
 
-  Value lb = entryBlock.getArgument(0);
-  Value ub = entryBlock.getArgument(1);
+  Value lb = funcOp.getArgument(0);
+  Value ub = funcOp.getArgument(1);
   Value step = createIndexConstant(1);
 
   acc::LoopOp loopOp = createLoopOp({lb}, {ub}, {step});
@@ -243,20 +242,17 @@ TEST_F(OpenACCUtilsLoopTest, ConvertLoopWithNonConstantBounds) {
 
   ASSERT_TRUE(forOp);
 
-  // Lower bound should be the function argument (no cast needed for index)
-  EXPECT_EQ(forOp.getLowerBound(), lb);
+  // Normalized: lb=0, step=1, ub=tripCount (computed from dynamic bounds)
+  auto lbConst = getConstantIndex(forOp.getLowerBound());
+  ASSERT_TRUE(lbConst.has_value());
+  EXPECT_EQ(*lbConst, 0);
 
-  // Upper bound should be ub + 1 (for inclusive -> exclusive conversion)
-  // Check it's an addi of ub and 1
-  auto ubAddOp = forOp.getUpperBound().getDefiningOp<arith::AddIOp>();
-  ASSERT_TRUE(ubAddOp);
-  EXPECT_EQ(ubAddOp.getLhs(), ub);
-  auto oneConst = getConstantIndex(ubAddOp.getRhs());
-  ASSERT_TRUE(oneConst.has_value());
-  EXPECT_EQ(*oneConst, 1);
+  auto stepConst = getConstantIndex(forOp.getStep());
+  ASSERT_TRUE(stepConst.has_value());
+  EXPECT_EQ(*stepConst, 1);
 
-  // Step should be the constant 1
-  EXPECT_EQ(forOp.getStep(), step);
+  // Upper bound is a computed trip count (not a constant)
+  EXPECT_FALSE(getConstantIndex(forOp.getUpperBound()).has_value());
 }
 
 TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForWithCollapse) {
@@ -353,10 +349,51 @@ TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForExclusiveUpperBound) {
 
   ASSERT_TRUE(forOp);
 
-  // With exclusive upper bound, ub should remain 10 (no +1 adjustment)
-  EXPECT_EQ(forOp.getLowerBound(), c0);
-  EXPECT_EQ(forOp.getUpperBound(), c10);
-  EXPECT_EQ(forOp.getStep(), c1);
+  // Normalized: lb=0, step=1, ub=tripCount
+  // For exclusive [0, 10) with step 1: tripCount = (9 - 0 + 1) / 1 = 10
+  auto lbConst = getConstantIndex(forOp.getLowerBound());
+  ASSERT_TRUE(lbConst.has_value());
+  EXPECT_EQ(*lbConst, 0);
+
+  auto ubConst = getConstantIndex(forOp.getUpperBound());
+  ASSERT_TRUE(ubConst.has_value());
+  EXPECT_EQ(*ubConst, 10);
+
+  auto stepConst = getConstantIndex(forOp.getStep());
+  ASSERT_TRUE(stepConst.has_value());
+  EXPECT_EQ(*stepConst, 1);
+}
+
+TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForNegativeStep) {
+  auto [module, funcOp] = createModuleWithFunc();
+
+  Value c10 = createIndexConstant(10);
+  Value c1 = createIndexConstant(1);
+  Value cNeg1 = createIndexConstant(-1);
+
+  // acc.loop from 10 to 1 step -1 (inclusive), like Fortran DO k = 10, 1, -1
+  acc::LoopOp loopOp = createLoopOp({c10}, {c1}, {cNeg1});
+  scf::ForOp forOp =
+      convertACCLoopToSCFFor(loopOp, b, /*enableCollapse=*/false);
+
+  ASSERT_TRUE(forOp);
+
+  // Normalized: lb=0, step=1, ub=tripCount
+  // tripCount = (1 - 10 + (-1)) / (-1) = (-10) / (-1) = 10
+  auto lbConst = getConstantIndex(forOp.getLowerBound());
+  ASSERT_TRUE(lbConst.has_value());
+  EXPECT_EQ(*lbConst, 0);
+
+  auto ubConst = getConstantIndex(forOp.getUpperBound());
+  ASSERT_TRUE(ubConst.has_value());
+  EXPECT_EQ(*ubConst, 10);
+
+  auto stepConst = getConstantIndex(forOp.getStep());
+  ASSERT_TRUE(stepConst.has_value());
+  EXPECT_EQ(*stepConst, 1);
+
+  EXPECT_TRUE(isa<scf::YieldOp>(forOp.getBody()->getTerminator()));
+  EXPECT_TRUE(module->verify().succeeded());
 }
 
 //===----------------------------------------------------------------------===//

>From b4ad8533d57de2d359a417af1f42134f2663c45e Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Thu, 5 Mar 2026 18:39:27 -0800
Subject: [PATCH 2/3] [mlir][OpenACC] Normalize loop bounds in
 convertACCLoopToSCFFor only for negative steps

convertACCLoopToSCFFor was passing acc.loop bounds directly to scf.for,
which produces an scf.for with a negative step when the source is a
Fortran DO loop counting down (e.g. DO k = n, 1, -1). scf.for requires
a positive step, so this led to invalid IR and downstream crashes.

Only normalize loops with a known negative constant step to lb=0,
step=1, ub=tripCount with IV denormalization. Loops with positive or
dynamic steps keep the original direct-bounds behavior unchanged.

Made-with: Cursor
---
 .../OpenACC/Utils/OpenACCUtilsLoop.cpp        | 69 ++++++++++++++-----
 .../Dialect/OpenACC/OpenACCUtilsLoopTest.cpp  | 40 +++++------
 2 files changed, 67 insertions(+), 42 deletions(-)

diff --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
index 9325478642040..94fc9b8de39ef 100644
--- a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
+++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
@@ -201,6 +201,16 @@ wrapMultiBlockRegionWithSCFExecuteRegion(Region &region, IRMapping &mapping,
   return exeRegionOp;
 }
 
+/// Return true if \p v is a constant index with a negative value.
+static bool isNegativeConstantIndex(Value v) {
+  if (auto cst = v.getDefiningOp<arith::ConstantIndexOp>())
+    return cst.value() < 0;
+  if (auto cst = v.getDefiningOp<arith::ConstantOp>())
+    if (auto intAttr = dyn_cast<IntegerAttr>(cst.getValue()))
+      return intAttr.getInt() < 0;
+  return false;
+}
+
 scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
                                   bool enableCollapse) {
   assert(!loopOp.getUnstructured() &&
@@ -208,6 +218,7 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
          "loops");
 
   Location loc = loopOp->getLoc();
+  Type indexType = rewriter.getIndexType();
 
   // Create nested scf.for loops and build IR mapping for IVs
   IRMapping mapping;
@@ -217,27 +228,47 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
   OpBuilder::InsertionGuard guard(rewriter);
   rewriter.setInsertionPoint(loopOp);
 
-  // Normalize all loops: lb=0, step=1, ub=tripCount.
-  // This is required because scf.for mandates a positive step, but acc.loop
-  // may have a negative step (e.g. Fortran DO loops counting down).
-  Value zero = arith::ConstantIndexOp::create(rewriter, loc, 0);
-  Value one = arith::ConstantIndexOp::create(rewriter, loc, 1);
+  // Determine which loops need normalization (negative constant step).
+  // scf.for requires a positive step, so loops with a negative step must be
+  // normalized to lb=0, step=1, ub=tripCount with IV denormalization.
+  SmallVector<bool> needsNormalize;
+  for (BlockArgument iv : loopOp.getBody().getArguments()) {
+    size_t idx = iv.getArgNumber();
+    needsNormalize.push_back(isNegativeConstantIndex(loopOp.getStep()[idx]));
+  }
 
-  SmallVector<Value> tripCounts;
+  // Pre-compute all loop bounds before creating any ForOp.
+  SmallVector<Value> lowerBounds, upperBounds, steps;
   for (BlockArgument iv : loopOp.getBody().getArguments()) {
     size_t idx = iv.getArgNumber();
-    bool inclusiveUpperbound = false;
-    if (loopOp.getInclusiveUpperbound().has_value())
-      inclusiveUpperbound =
-          loopOp.getInclusiveUpperboundAttr().asArrayRef()[idx];
 
-    Value tc = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
-                                  loopOp.getUpperbound()[idx],
-                                  loopOp.getStep()[idx], inclusiveUpperbound);
-    tripCounts.push_back(tc);
+    if (needsNormalize[idx]) {
+      bool inclusiveUpperbound = false;
+      if (loopOp.getInclusiveUpperbound().has_value())
+        inclusiveUpperbound =
+            loopOp.getInclusiveUpperboundAttr().asArrayRef()[idx];
+
+      Value zero = arith::ConstantIndexOp::create(rewriter, loc, 0);
+      Value one = arith::ConstantIndexOp::create(rewriter, loc, 1);
+      Value tc = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
+                                    loopOp.getUpperbound()[idx],
+                                    loopOp.getStep()[idx], inclusiveUpperbound);
+      lowerBounds.push_back(zero);
+      upperBounds.push_back(tc);
+      steps.push_back(one);
+    } else {
+      Value newLB = getValueOrCreateCastToIndexLike(
+          rewriter, loc, indexType, loopOp.getLowerbound()[idx]);
+      Value newUB = getExclusiveUpperBoundAsIndex(loopOp, idx, rewriter);
+      Value newStep = getValueOrCreateCastToIndexLike(rewriter, loc, indexType,
+                                                      loopOp.getStep()[idx]);
+      lowerBounds.push_back(newLB);
+      upperBounds.push_back(newUB);
+      steps.push_back(newStep);
+    }
   }
 
-  // Now create the nested ForOps using normalized bounds
+  // Now create the nested ForOps using the pre-computed bounds
   for (BlockArgument iv : loopOp.getBody().getArguments()) {
     size_t idx = iv.getArgNumber();
 
@@ -245,8 +276,8 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
     if (idx > 0)
       rewriter.setInsertionPointToStart(forOps.back().getBody());
 
-    scf::ForOp forOp =
-        scf::ForOp::create(rewriter, loc, zero, tripCounts[idx], one);
+    scf::ForOp forOp = scf::ForOp::create(rewriter, loc, lowerBounds[idx],
+                                          upperBounds[idx], steps[idx]);
     forOps.push_back(forOp);
     mapping.map(iv, forOp.getInductionVar());
   }
@@ -264,8 +295,10 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
   cloneACCRegionIntoForLoop(&loopOp.getRegion(), forOps.back().getBody(),
                             rewriter.getInsertionPoint(), mapping, rewriter);
 
-  // Denormalize IV uses: original_iv = normalized_iv * orig_step + orig_lb
+  // Denormalize IV uses for normalized loops only
   for (size_t idx = 0; idx < forOps.size(); ++idx) {
+    if (!needsNormalize[idx])
+      continue;
     Value iv = forOps[idx].getInductionVar();
     if (!iv.use_empty()) {
       rewriter.setInsertionPointToStart(forOps[idx].getBody());
diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
index e54a4f5bb4f1f..0925864f970f5 100644
--- a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
+++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
@@ -242,17 +242,18 @@ TEST_F(OpenACCUtilsLoopTest, ConvertLoopWithNonConstantBounds) {
 
   ASSERT_TRUE(forOp);
 
-  // Normalized: lb=0, step=1, ub=tripCount (computed from dynamic bounds)
-  auto lbConst = getConstantIndex(forOp.getLowerBound());
-  ASSERT_TRUE(lbConst.has_value());
-  EXPECT_EQ(*lbConst, 0);
-
-  auto stepConst = getConstantIndex(forOp.getStep());
-  ASSERT_TRUE(stepConst.has_value());
-  EXPECT_EQ(*stepConst, 1);
-
-  // Upper bound is a computed trip count (not a constant)
-  EXPECT_FALSE(getConstantIndex(forOp.getUpperBound()).has_value());
+  // Positive step: bounds passed through directly (no normalization)
+  EXPECT_EQ(forOp.getLowerBound(), lb);
+
+  // Upper bound should be ub + 1 (for inclusive -> exclusive conversion)
+  auto ubAddOp = forOp.getUpperBound().getDefiningOp<arith::AddIOp>();
+  ASSERT_TRUE(ubAddOp);
+  EXPECT_EQ(ubAddOp.getLhs(), ub);
+  auto oneConst = getConstantIndex(ubAddOp.getRhs());
+  ASSERT_TRUE(oneConst.has_value());
+  EXPECT_EQ(*oneConst, 1);
+
+  EXPECT_EQ(forOp.getStep(), step);
 }
 
 TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForWithCollapse) {
@@ -349,19 +350,10 @@ TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForExclusiveUpperBound) {
 
   ASSERT_TRUE(forOp);
 
-  // Normalized: lb=0, step=1, ub=tripCount
-  // For exclusive [0, 10) with step 1: tripCount = (9 - 0 + 1) / 1 = 10
-  auto lbConst = getConstantIndex(forOp.getLowerBound());
-  ASSERT_TRUE(lbConst.has_value());
-  EXPECT_EQ(*lbConst, 0);
-
-  auto ubConst = getConstantIndex(forOp.getUpperBound());
-  ASSERT_TRUE(ubConst.has_value());
-  EXPECT_EQ(*ubConst, 10);
-
-  auto stepConst = getConstantIndex(forOp.getStep());
-  ASSERT_TRUE(stepConst.has_value());
-  EXPECT_EQ(*stepConst, 1);
+  // Positive step: bounds passed through directly (no normalization)
+  EXPECT_EQ(forOp.getLowerBound(), c0);
+  EXPECT_EQ(forOp.getUpperBound(), c10);
+  EXPECT_EQ(forOp.getStep(), c1);
 }
 
 TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForNegativeStep) {

>From 456cd80c276667c88e241dfa06383affe9e21b27 Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Mon, 9 Mar 2026 10:54:51 -0700
Subject: [PATCH 3/3] [mlir][OpenACC] Unconditionally normalize all loops in
 convertACCLoopToSCFFor

Normalize unconditionally (lb=0, step=1, ub=tripCount) to match
convertACCLoopToSCFParallel. Remove isNegativeConstantIndex and
getExclusiveUpperBoundAsIndex which are no longer needed.

Address review feedback: let later passes fold constants rather
than checking for negative steps explicitly.

Made-with: Cursor
---
 .../OpenACC/Utils/OpenACCUtilsLoop.cpp        | 107 ++++--------------
 .../Dialect/OpenACC/OpenACCUtilsLoopTest.cpp  |  41 ++++---
 2 files changed, 45 insertions(+), 103 deletions(-)

diff --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
index 94fc9b8de39ef..d231e1f6794ce 100644
--- a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
+++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
@@ -50,28 +50,6 @@ static Value calculateTripCount(OpBuilder &b, Location loc, Value lb, Value ub,
   return b.createOrFold<arith::DivSIOp>(loc, add, step);
 }
 
-/// Get exclusive upper bound from acc.loop (add 1 if inclusive).
-/// The result is always in index type.
-static Value getExclusiveUpperBoundAsIndex(acc::LoopOp loopOp, size_t ivPos,
-                                           OpBuilder &b) {
-  bool isInclusive = false;
-  if (loopOp.getInclusiveUpperbound().has_value())
-    isInclusive = loopOp.getInclusiveUpperboundAttr().asArrayRef()[ivPos];
-
-  Value origUB = loopOp.getUpperbound()[ivPos];
-  Location loc = origUB.getLoc();
-  Type indexType = b.getIndexType();
-
-  // Cast to index first, then add if inclusive
-  Value ub = getValueOrCreateCastToIndexLike(b, loc, indexType, origUB);
-  if (isInclusive) {
-    Value one = arith::ConstantIndexOp::create(b, loc, 1);
-    ub = b.createOrFold<arith::AddIOp>(loc, ub, one,
-                                       arith::IntegerOverflowFlags::nsw);
-  }
-  return ub;
-}
-
 /// Handle differing types between SCF (index) and ACC loops.
 /// Creates casts from the new SCF IVs to the original ACC IV types and updates
 /// the mapping. The newIVs should correspond 1:1 with the ACC loop's IVs.
@@ -201,16 +179,6 @@ wrapMultiBlockRegionWithSCFExecuteRegion(Region &region, IRMapping &mapping,
   return exeRegionOp;
 }
 
-/// Return true if \p v is a constant index with a negative value.
-static bool isNegativeConstantIndex(Value v) {
-  if (auto cst = v.getDefiningOp<arith::ConstantIndexOp>())
-    return cst.value() < 0;
-  if (auto cst = v.getDefiningOp<arith::ConstantOp>())
-    if (auto intAttr = dyn_cast<IntegerAttr>(cst.getValue()))
-      return intAttr.getInt() < 0;
-  return false;
-}
-
 scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
                                   bool enableCollapse) {
   assert(!loopOp.getUnstructured() &&
@@ -218,87 +186,55 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
          "loops");
 
   Location loc = loopOp->getLoc();
-  Type indexType = rewriter.getIndexType();
 
-  // Create nested scf.for loops and build IR mapping for IVs
   IRMapping mapping;
   SmallVector<scf::ForOp> forOps;
 
-  // Save the original insertion point
   OpBuilder::InsertionGuard guard(rewriter);
   rewriter.setInsertionPoint(loopOp);
 
-  // Determine which loops need normalization (negative constant step).
-  // scf.for requires a positive step, so loops with a negative step must be
-  // normalized to lb=0, step=1, ub=tripCount with IV denormalization.
-  SmallVector<bool> needsNormalize;
-  for (BlockArgument iv : loopOp.getBody().getArguments()) {
-    size_t idx = iv.getArgNumber();
-    needsNormalize.push_back(isNegativeConstantIndex(loopOp.getStep()[idx]));
-  }
+  // Normalize all loops: lb=0, step=1, ub=tripCount.
+  // scf.for requires a positive step, but acc.loop may have arbitrary steps
+  // (including negative). Normalizing unconditionally keeps this consistent
+  // with convertACCLoopToSCFParallel and lets later passes fold constants.
+  Value zero = arith::ConstantIndexOp::create(rewriter, loc, 0);
+  Value one = arith::ConstantIndexOp::create(rewriter, loc, 1);
 
-  // Pre-compute all loop bounds before creating any ForOp.
-  SmallVector<Value> lowerBounds, upperBounds, steps;
-  for (BlockArgument iv : loopOp.getBody().getArguments()) {
-    size_t idx = iv.getArgNumber();
-
-    if (needsNormalize[idx]) {
-      bool inclusiveUpperbound = false;
-      if (loopOp.getInclusiveUpperbound().has_value())
-        inclusiveUpperbound =
-            loopOp.getInclusiveUpperboundAttr().asArrayRef()[idx];
-
-      Value zero = arith::ConstantIndexOp::create(rewriter, loc, 0);
-      Value one = arith::ConstantIndexOp::create(rewriter, loc, 1);
-      Value tc = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
-                                    loopOp.getUpperbound()[idx],
-                                    loopOp.getStep()[idx], inclusiveUpperbound);
-      lowerBounds.push_back(zero);
-      upperBounds.push_back(tc);
-      steps.push_back(one);
-    } else {
-      Value newLB = getValueOrCreateCastToIndexLike(
-          rewriter, loc, indexType, loopOp.getLowerbound()[idx]);
-      Value newUB = getExclusiveUpperBoundAsIndex(loopOp, idx, rewriter);
-      Value newStep = getValueOrCreateCastToIndexLike(rewriter, loc, indexType,
-                                                      loopOp.getStep()[idx]);
-      lowerBounds.push_back(newLB);
-      upperBounds.push_back(newUB);
-      steps.push_back(newStep);
-    }
-  }
+  SmallVector<Value> tripCounts;
+  for (auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
+    bool inclusiveUpperbound = false;
+    if (loopOp.getInclusiveUpperbound().has_value())
+      inclusiveUpperbound =
+          loopOp.getInclusiveUpperboundAttr().asArrayRef()[idx];
 
-  // Now create the nested ForOps using the pre-computed bounds
-  for (BlockArgument iv : loopOp.getBody().getArguments()) {
-    size_t idx = iv.getArgNumber();
+    Value tc = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
+                                  loopOp.getUpperbound()[idx],
+                                  loopOp.getStep()[idx], inclusiveUpperbound);
+    tripCounts.push_back(tc);
+  }
 
-    // For nested loops, insert inside the previous loop's body
+  for (auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
     if (idx > 0)
       rewriter.setInsertionPointToStart(forOps.back().getBody());
 
-    scf::ForOp forOp = scf::ForOp::create(rewriter, loc, lowerBounds[idx],
-                                          upperBounds[idx], steps[idx]);
+    scf::ForOp forOp =
+        scf::ForOp::create(rewriter, loc, zero, tripCounts[idx], one);
     forOps.push_back(forOp);
     mapping.map(iv, forOp.getInductionVar());
   }
 
-  // Set insertion point inside the innermost loop for IV casts and body cloning
   rewriter.setInsertionPointToStart(forOps.back().getBody());
 
-  // Handle IV type conversion (index -> original type)
   SmallVector<Value> scfIVs;
   for (scf::ForOp forOp : forOps)
     scfIVs.push_back(forOp.getInductionVar());
   mapACCLoopIVsToSCFIVs(loopOp, scfIVs, rewriter, mapping);
 
-  // Clone the loop body into the innermost scf.for
   cloneACCRegionIntoForLoop(&loopOp.getRegion(), forOps.back().getBody(),
                             rewriter.getInsertionPoint(), mapping, rewriter);
 
-  // Denormalize IV uses for normalized loops only
+  // Denormalize IV uses: original_iv = normalized_iv * orig_step + orig_lb
   for (size_t idx = 0; idx < forOps.size(); ++idx) {
-    if (!needsNormalize[idx])
-      continue;
     Value iv = forOps[idx].getInductionVar();
     if (!iv.use_empty()) {
       rewriter.setInsertionPointToStart(forOps[idx].getBody());
@@ -307,7 +243,6 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
     }
   }
 
-  // Optionally collapse nested loops
   if (enableCollapse && forOps.size() > 1)
     if (failed(coalesceLoops(rewriter, forOps)))
       loopOp.emitError("failed to collapse acc.loop");
diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
index 0925864f970f5..6a80b8987ac27 100644
--- a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
+++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
@@ -242,18 +242,16 @@ TEST_F(OpenACCUtilsLoopTest, ConvertLoopWithNonConstantBounds) {
 
   ASSERT_TRUE(forOp);
 
-  // Positive step: bounds passed through directly (no normalization)
-  EXPECT_EQ(forOp.getLowerBound(), lb);
-
-  // Upper bound should be ub + 1 (for inclusive -> exclusive conversion)
-  auto ubAddOp = forOp.getUpperBound().getDefiningOp<arith::AddIOp>();
-  ASSERT_TRUE(ubAddOp);
-  EXPECT_EQ(ubAddOp.getLhs(), ub);
-  auto oneConst = getConstantIndex(ubAddOp.getRhs());
-  ASSERT_TRUE(oneConst.has_value());
-  EXPECT_EQ(*oneConst, 1);
-
-  EXPECT_EQ(forOp.getStep(), step);
+  // Normalized: lb=0, step=1, ub=tripCount (computed from dynamic bounds)
+  auto lbConst = getConstantIndex(forOp.getLowerBound());
+  ASSERT_TRUE(lbConst.has_value());
+  EXPECT_EQ(*lbConst, 0);
+
+  auto stepConst = getConstantIndex(forOp.getStep());
+  ASSERT_TRUE(stepConst.has_value());
+  EXPECT_EQ(*stepConst, 1);
+
+  EXPECT_FALSE(getConstantIndex(forOp.getUpperBound()).has_value());
 }
 
 TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForWithCollapse) {
@@ -350,10 +348,19 @@ TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForExclusiveUpperBound) {
 
   ASSERT_TRUE(forOp);
 
-  // Positive step: bounds passed through directly (no normalization)
-  EXPECT_EQ(forOp.getLowerBound(), c0);
-  EXPECT_EQ(forOp.getUpperBound(), c10);
-  EXPECT_EQ(forOp.getStep(), c1);
+  // Normalized: lb=0, step=1, ub=tripCount
+  // For exclusive [0, 10) with step 1: tripCount = (9 - 0 + 1) / 1 = 10
+  auto lbConst = getConstantIndex(forOp.getLowerBound());
+  ASSERT_TRUE(lbConst.has_value());
+  EXPECT_EQ(*lbConst, 0);
+
+  auto ubConst = getConstantIndex(forOp.getUpperBound());
+  ASSERT_TRUE(ubConst.has_value());
+  EXPECT_EQ(*ubConst, 10);
+
+  auto stepConst = getConstantIndex(forOp.getStep());
+  ASSERT_TRUE(stepConst.has_value());
+  EXPECT_EQ(*stepConst, 1);
 }
 
 TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForNegativeStep) {
@@ -363,7 +370,7 @@ TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForNegativeStep) {
   Value c1 = createIndexConstant(1);
   Value cNeg1 = createIndexConstant(-1);
 
-  // acc.loop from 10 to 1 step -1 (inclusive), like Fortran DO k = 10, 1, -1
+  // acc.loop from 10 to 1 step -1 (inclusive)
   acc::LoopOp loopOp = createLoopOp({c10}, {c1}, {cNeg1});
   scf::ForOp forOp =
       convertACCLoopToSCFFor(loopOp, b, /*enableCollapse=*/false);



More information about the Mlir-commits mailing list