[Mlir-commits] [mlir] [mlir][OpenACC] Normalize loop bounds in convertACCLoopToSCFFor for negative steps (PR #184935)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Mon Mar 9 10:55:13 PDT 2026
https://github.com/khaki3 updated https://github.com/llvm/llvm-project/pull/184935
>From 24122a8eef48c6e82e0d2beb7be0f31b5e136dee Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Thu, 5 Mar 2026 18:19:06 -0800
Subject: [PATCH 1/3] [mlir][OpenACC] Normalize loop bounds in
convertACCLoopToSCFFor
convertACCLoopToSCFFor was passing acc.loop bounds directly to scf.for,
which can produce an scf.for with a negative step when the source is a
Fortran DO loop counting down (e.g. DO k = n, 1, -1). scf.for requires
a positive step, so this led to invalid IR and downstream crashes.
Normalize all loops to lb=0, step=1, ub=tripCount and denormalize IV
uses inside the body, matching what convertACCLoopToSCFParallel already
does.
Made-with: Cursor
---
.../OpenACC/Utils/OpenACCUtilsLoop.cpp | 45 +++++++----
.../Dialect/OpenACC/OpenACCUtilsLoopTest.cpp | 75 ++++++++++++++-----
2 files changed, 85 insertions(+), 35 deletions(-)
diff --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
index de4f0a26a0f11..9325478642040 100644
--- a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
+++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
@@ -208,7 +208,6 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
"loops");
Location loc = loopOp->getLoc();
- Type indexType = rewriter.getIndexType();
// Create nested scf.for loops and build IR mapping for IVs
IRMapping mapping;
@@ -218,23 +217,27 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
OpBuilder::InsertionGuard guard(rewriter);
rewriter.setInsertionPoint(loopOp);
- // First, compute ALL loop bounds at the current insertion point (before
- // any ForOp). This ensures all bounds are defined in the outer scope,
- // which is required for coalesceLoops to work correctly.
- SmallVector<Value> lowerBounds, upperBounds, steps;
+ // Normalize all loops: lb=0, step=1, ub=tripCount.
+ // This is required because scf.for mandates a positive step, but acc.loop
+ // may have a negative step (e.g. Fortran DO loops counting down).
+ Value zero = arith::ConstantIndexOp::create(rewriter, loc, 0);
+ Value one = arith::ConstantIndexOp::create(rewriter, loc, 1);
+
+ SmallVector<Value> tripCounts;
for (BlockArgument iv : loopOp.getBody().getArguments()) {
size_t idx = iv.getArgNumber();
- Value newLowerBound = getValueOrCreateCastToIndexLike(
- rewriter, loc, indexType, loopOp.getLowerbound()[idx]);
- Value newUpperBound = getExclusiveUpperBoundAsIndex(loopOp, idx, rewriter);
- Value newStep = getValueOrCreateCastToIndexLike(rewriter, loc, indexType,
- loopOp.getStep()[idx]);
- lowerBounds.push_back(newLowerBound);
- upperBounds.push_back(newUpperBound);
- steps.push_back(newStep);
+ bool inclusiveUpperbound = false;
+ if (loopOp.getInclusiveUpperbound().has_value())
+ inclusiveUpperbound =
+ loopOp.getInclusiveUpperboundAttr().asArrayRef()[idx];
+
+ Value tc = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
+ loopOp.getUpperbound()[idx],
+ loopOp.getStep()[idx], inclusiveUpperbound);
+ tripCounts.push_back(tc);
}
- // Now create the nested ForOps using the pre-computed bounds
+ // Now create the nested ForOps using normalized bounds
for (BlockArgument iv : loopOp.getBody().getArguments()) {
size_t idx = iv.getArgNumber();
@@ -242,8 +245,8 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
if (idx > 0)
rewriter.setInsertionPointToStart(forOps.back().getBody());
- scf::ForOp forOp = scf::ForOp::create(rewriter, loc, lowerBounds[idx],
- upperBounds[idx], steps[idx]);
+ scf::ForOp forOp =
+ scf::ForOp::create(rewriter, loc, zero, tripCounts[idx], one);
forOps.push_back(forOp);
mapping.map(iv, forOp.getInductionVar());
}
@@ -261,6 +264,16 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
cloneACCRegionIntoForLoop(&loopOp.getRegion(), forOps.back().getBody(),
rewriter.getInsertionPoint(), mapping, rewriter);
+ // Denormalize IV uses: original_iv = normalized_iv * orig_step + orig_lb
+ for (size_t idx = 0; idx < forOps.size(); ++idx) {
+ Value iv = forOps[idx].getInductionVar();
+ if (!iv.use_empty()) {
+ rewriter.setInsertionPointToStart(forOps[idx].getBody());
+ normalizeIVUses(rewriter, loc, iv, loopOp.getLowerbound()[idx],
+ loopOp.getStep()[idx]);
+ }
+ }
+
// Optionally collapse nested loops
if (enableCollapse && forOps.size() > 1)
if (failed(coalesceLoops(rewriter, forOps)))
diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
index 3b1558e4f4146..e54a4f5bb4f1f 100644
--- a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
+++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
@@ -231,10 +231,9 @@ TEST_F(OpenACCUtilsLoopTest, ConvertLoopWithI32Bounds) {
TEST_F(OpenACCUtilsLoopTest, ConvertLoopWithNonConstantBounds) {
auto [module, funcOp] =
createModuleWithFuncArgs({b.getIndexType(), b.getIndexType()});
- Block &entryBlock = funcOp.getBody().front();
- Value lb = entryBlock.getArgument(0);
- Value ub = entryBlock.getArgument(1);
+ Value lb = funcOp.getArgument(0);
+ Value ub = funcOp.getArgument(1);
Value step = createIndexConstant(1);
acc::LoopOp loopOp = createLoopOp({lb}, {ub}, {step});
@@ -243,20 +242,17 @@ TEST_F(OpenACCUtilsLoopTest, ConvertLoopWithNonConstantBounds) {
ASSERT_TRUE(forOp);
- // Lower bound should be the function argument (no cast needed for index)
- EXPECT_EQ(forOp.getLowerBound(), lb);
+ // Normalized: lb=0, step=1, ub=tripCount (computed from dynamic bounds)
+ auto lbConst = getConstantIndex(forOp.getLowerBound());
+ ASSERT_TRUE(lbConst.has_value());
+ EXPECT_EQ(*lbConst, 0);
- // Upper bound should be ub + 1 (for inclusive -> exclusive conversion)
- // Check it's an addi of ub and 1
- auto ubAddOp = forOp.getUpperBound().getDefiningOp<arith::AddIOp>();
- ASSERT_TRUE(ubAddOp);
- EXPECT_EQ(ubAddOp.getLhs(), ub);
- auto oneConst = getConstantIndex(ubAddOp.getRhs());
- ASSERT_TRUE(oneConst.has_value());
- EXPECT_EQ(*oneConst, 1);
+ auto stepConst = getConstantIndex(forOp.getStep());
+ ASSERT_TRUE(stepConst.has_value());
+ EXPECT_EQ(*stepConst, 1);
- // Step should be the constant 1
- EXPECT_EQ(forOp.getStep(), step);
+ // Upper bound is a computed trip count (not a constant)
+ EXPECT_FALSE(getConstantIndex(forOp.getUpperBound()).has_value());
}
TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForWithCollapse) {
@@ -353,10 +349,51 @@ TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForExclusiveUpperBound) {
ASSERT_TRUE(forOp);
- // With exclusive upper bound, ub should remain 10 (no +1 adjustment)
- EXPECT_EQ(forOp.getLowerBound(), c0);
- EXPECT_EQ(forOp.getUpperBound(), c10);
- EXPECT_EQ(forOp.getStep(), c1);
+ // Normalized: lb=0, step=1, ub=tripCount
+ // For exclusive [0, 10) with step 1: tripCount = (9 - 0 + 1) / 1 = 10
+ auto lbConst = getConstantIndex(forOp.getLowerBound());
+ ASSERT_TRUE(lbConst.has_value());
+ EXPECT_EQ(*lbConst, 0);
+
+ auto ubConst = getConstantIndex(forOp.getUpperBound());
+ ASSERT_TRUE(ubConst.has_value());
+ EXPECT_EQ(*ubConst, 10);
+
+ auto stepConst = getConstantIndex(forOp.getStep());
+ ASSERT_TRUE(stepConst.has_value());
+ EXPECT_EQ(*stepConst, 1);
+}
+
+TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForNegativeStep) {
+ auto [module, funcOp] = createModuleWithFunc();
+
+ Value c10 = createIndexConstant(10);
+ Value c1 = createIndexConstant(1);
+ Value cNeg1 = createIndexConstant(-1);
+
+ // acc.loop from 10 to 1 step -1 (inclusive), like Fortran DO k = 10, 1, -1
+ acc::LoopOp loopOp = createLoopOp({c10}, {c1}, {cNeg1});
+ scf::ForOp forOp =
+ convertACCLoopToSCFFor(loopOp, b, /*enableCollapse=*/false);
+
+ ASSERT_TRUE(forOp);
+
+ // Normalized: lb=0, step=1, ub=tripCount
+ // tripCount = (1 - 10 + (-1)) / (-1) = (-10) / (-1) = 10
+ auto lbConst = getConstantIndex(forOp.getLowerBound());
+ ASSERT_TRUE(lbConst.has_value());
+ EXPECT_EQ(*lbConst, 0);
+
+ auto ubConst = getConstantIndex(forOp.getUpperBound());
+ ASSERT_TRUE(ubConst.has_value());
+ EXPECT_EQ(*ubConst, 10);
+
+ auto stepConst = getConstantIndex(forOp.getStep());
+ ASSERT_TRUE(stepConst.has_value());
+ EXPECT_EQ(*stepConst, 1);
+
+ EXPECT_TRUE(isa<scf::YieldOp>(forOp.getBody()->getTerminator()));
+ EXPECT_TRUE(module->verify().succeeded());
}
//===----------------------------------------------------------------------===//
>From b4ad8533d57de2d359a417af1f42134f2663c45e Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Thu, 5 Mar 2026 18:39:27 -0800
Subject: [PATCH 2/3] [mlir][OpenACC] Normalize loop bounds in
convertACCLoopToSCFFor only for negative steps
convertACCLoopToSCFFor was passing acc.loop bounds directly to scf.for,
which produces an scf.for with a negative step when the source is a
Fortran DO loop counting down (e.g. DO k = n, 1, -1). scf.for requires
a positive step, so this led to invalid IR and downstream crashes.
Only normalize loops with a known negative constant step to lb=0,
step=1, ub=tripCount with IV denormalization. Loops with positive or
dynamic steps keep the original direct-bounds behavior unchanged.
Made-with: Cursor
---
.../OpenACC/Utils/OpenACCUtilsLoop.cpp | 69 ++++++++++++++-----
.../Dialect/OpenACC/OpenACCUtilsLoopTest.cpp | 40 +++++------
2 files changed, 67 insertions(+), 42 deletions(-)
diff --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
index 9325478642040..94fc9b8de39ef 100644
--- a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
+++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
@@ -201,6 +201,16 @@ wrapMultiBlockRegionWithSCFExecuteRegion(Region ®ion, IRMapping &mapping,
return exeRegionOp;
}
+/// Return true if \p v is a constant index with a negative value.
+static bool isNegativeConstantIndex(Value v) {
+ if (auto cst = v.getDefiningOp<arith::ConstantIndexOp>())
+ return cst.value() < 0;
+ if (auto cst = v.getDefiningOp<arith::ConstantOp>())
+ if (auto intAttr = dyn_cast<IntegerAttr>(cst.getValue()))
+ return intAttr.getInt() < 0;
+ return false;
+}
+
scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
bool enableCollapse) {
assert(!loopOp.getUnstructured() &&
@@ -208,6 +218,7 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
"loops");
Location loc = loopOp->getLoc();
+ Type indexType = rewriter.getIndexType();
// Create nested scf.for loops and build IR mapping for IVs
IRMapping mapping;
@@ -217,27 +228,47 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
OpBuilder::InsertionGuard guard(rewriter);
rewriter.setInsertionPoint(loopOp);
- // Normalize all loops: lb=0, step=1, ub=tripCount.
- // This is required because scf.for mandates a positive step, but acc.loop
- // may have a negative step (e.g. Fortran DO loops counting down).
- Value zero = arith::ConstantIndexOp::create(rewriter, loc, 0);
- Value one = arith::ConstantIndexOp::create(rewriter, loc, 1);
+ // Determine which loops need normalization (negative constant step).
+ // scf.for requires a positive step, so loops with a negative step must be
+ // normalized to lb=0, step=1, ub=tripCount with IV denormalization.
+ SmallVector<bool> needsNormalize;
+ for (BlockArgument iv : loopOp.getBody().getArguments()) {
+ size_t idx = iv.getArgNumber();
+ needsNormalize.push_back(isNegativeConstantIndex(loopOp.getStep()[idx]));
+ }
- SmallVector<Value> tripCounts;
+ // Pre-compute all loop bounds before creating any ForOp.
+ SmallVector<Value> lowerBounds, upperBounds, steps;
for (BlockArgument iv : loopOp.getBody().getArguments()) {
size_t idx = iv.getArgNumber();
- bool inclusiveUpperbound = false;
- if (loopOp.getInclusiveUpperbound().has_value())
- inclusiveUpperbound =
- loopOp.getInclusiveUpperboundAttr().asArrayRef()[idx];
- Value tc = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
- loopOp.getUpperbound()[idx],
- loopOp.getStep()[idx], inclusiveUpperbound);
- tripCounts.push_back(tc);
+ if (needsNormalize[idx]) {
+ bool inclusiveUpperbound = false;
+ if (loopOp.getInclusiveUpperbound().has_value())
+ inclusiveUpperbound =
+ loopOp.getInclusiveUpperboundAttr().asArrayRef()[idx];
+
+ Value zero = arith::ConstantIndexOp::create(rewriter, loc, 0);
+ Value one = arith::ConstantIndexOp::create(rewriter, loc, 1);
+ Value tc = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
+ loopOp.getUpperbound()[idx],
+ loopOp.getStep()[idx], inclusiveUpperbound);
+ lowerBounds.push_back(zero);
+ upperBounds.push_back(tc);
+ steps.push_back(one);
+ } else {
+ Value newLB = getValueOrCreateCastToIndexLike(
+ rewriter, loc, indexType, loopOp.getLowerbound()[idx]);
+ Value newUB = getExclusiveUpperBoundAsIndex(loopOp, idx, rewriter);
+ Value newStep = getValueOrCreateCastToIndexLike(rewriter, loc, indexType,
+ loopOp.getStep()[idx]);
+ lowerBounds.push_back(newLB);
+ upperBounds.push_back(newUB);
+ steps.push_back(newStep);
+ }
}
- // Now create the nested ForOps using normalized bounds
+ // Now create the nested ForOps using the pre-computed bounds
for (BlockArgument iv : loopOp.getBody().getArguments()) {
size_t idx = iv.getArgNumber();
@@ -245,8 +276,8 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
if (idx > 0)
rewriter.setInsertionPointToStart(forOps.back().getBody());
- scf::ForOp forOp =
- scf::ForOp::create(rewriter, loc, zero, tripCounts[idx], one);
+ scf::ForOp forOp = scf::ForOp::create(rewriter, loc, lowerBounds[idx],
+ upperBounds[idx], steps[idx]);
forOps.push_back(forOp);
mapping.map(iv, forOp.getInductionVar());
}
@@ -264,8 +295,10 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
cloneACCRegionIntoForLoop(&loopOp.getRegion(), forOps.back().getBody(),
rewriter.getInsertionPoint(), mapping, rewriter);
- // Denormalize IV uses: original_iv = normalized_iv * orig_step + orig_lb
+ // Denormalize IV uses for normalized loops only
for (size_t idx = 0; idx < forOps.size(); ++idx) {
+ if (!needsNormalize[idx])
+ continue;
Value iv = forOps[idx].getInductionVar();
if (!iv.use_empty()) {
rewriter.setInsertionPointToStart(forOps[idx].getBody());
diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
index e54a4f5bb4f1f..0925864f970f5 100644
--- a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
+++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
@@ -242,17 +242,18 @@ TEST_F(OpenACCUtilsLoopTest, ConvertLoopWithNonConstantBounds) {
ASSERT_TRUE(forOp);
- // Normalized: lb=0, step=1, ub=tripCount (computed from dynamic bounds)
- auto lbConst = getConstantIndex(forOp.getLowerBound());
- ASSERT_TRUE(lbConst.has_value());
- EXPECT_EQ(*lbConst, 0);
-
- auto stepConst = getConstantIndex(forOp.getStep());
- ASSERT_TRUE(stepConst.has_value());
- EXPECT_EQ(*stepConst, 1);
-
- // Upper bound is a computed trip count (not a constant)
- EXPECT_FALSE(getConstantIndex(forOp.getUpperBound()).has_value());
+ // Positive step: bounds passed through directly (no normalization)
+ EXPECT_EQ(forOp.getLowerBound(), lb);
+
+ // Upper bound should be ub + 1 (for inclusive -> exclusive conversion)
+ auto ubAddOp = forOp.getUpperBound().getDefiningOp<arith::AddIOp>();
+ ASSERT_TRUE(ubAddOp);
+ EXPECT_EQ(ubAddOp.getLhs(), ub);
+ auto oneConst = getConstantIndex(ubAddOp.getRhs());
+ ASSERT_TRUE(oneConst.has_value());
+ EXPECT_EQ(*oneConst, 1);
+
+ EXPECT_EQ(forOp.getStep(), step);
}
TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForWithCollapse) {
@@ -349,19 +350,10 @@ TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForExclusiveUpperBound) {
ASSERT_TRUE(forOp);
- // Normalized: lb=0, step=1, ub=tripCount
- // For exclusive [0, 10) with step 1: tripCount = (9 - 0 + 1) / 1 = 10
- auto lbConst = getConstantIndex(forOp.getLowerBound());
- ASSERT_TRUE(lbConst.has_value());
- EXPECT_EQ(*lbConst, 0);
-
- auto ubConst = getConstantIndex(forOp.getUpperBound());
- ASSERT_TRUE(ubConst.has_value());
- EXPECT_EQ(*ubConst, 10);
-
- auto stepConst = getConstantIndex(forOp.getStep());
- ASSERT_TRUE(stepConst.has_value());
- EXPECT_EQ(*stepConst, 1);
+ // Positive step: bounds passed through directly (no normalization)
+ EXPECT_EQ(forOp.getLowerBound(), c0);
+ EXPECT_EQ(forOp.getUpperBound(), c10);
+ EXPECT_EQ(forOp.getStep(), c1);
}
TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForNegativeStep) {
>From 456cd80c276667c88e241dfa06383affe9e21b27 Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Mon, 9 Mar 2026 10:54:51 -0700
Subject: [PATCH 3/3] [mlir][OpenACC] Unconditionally normalize all loops in
convertACCLoopToSCFFor
Normalize unconditionally (lb=0, step=1, ub=tripCount) to match
convertACCLoopToSCFParallel. Remove isNegativeConstantIndex and
getExclusiveUpperBoundAsIndex which are no longer needed.
Address review feedback: let later passes fold constants rather
than checking for negative steps explicitly.
Made-with: Cursor
---
.../OpenACC/Utils/OpenACCUtilsLoop.cpp | 107 ++++--------------
.../Dialect/OpenACC/OpenACCUtilsLoopTest.cpp | 41 ++++---
2 files changed, 45 insertions(+), 103 deletions(-)
diff --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
index 94fc9b8de39ef..d231e1f6794ce 100644
--- a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
+++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
@@ -50,28 +50,6 @@ static Value calculateTripCount(OpBuilder &b, Location loc, Value lb, Value ub,
return b.createOrFold<arith::DivSIOp>(loc, add, step);
}
-/// Get exclusive upper bound from acc.loop (add 1 if inclusive).
-/// The result is always in index type.
-static Value getExclusiveUpperBoundAsIndex(acc::LoopOp loopOp, size_t ivPos,
- OpBuilder &b) {
- bool isInclusive = false;
- if (loopOp.getInclusiveUpperbound().has_value())
- isInclusive = loopOp.getInclusiveUpperboundAttr().asArrayRef()[ivPos];
-
- Value origUB = loopOp.getUpperbound()[ivPos];
- Location loc = origUB.getLoc();
- Type indexType = b.getIndexType();
-
- // Cast to index first, then add if inclusive
- Value ub = getValueOrCreateCastToIndexLike(b, loc, indexType, origUB);
- if (isInclusive) {
- Value one = arith::ConstantIndexOp::create(b, loc, 1);
- ub = b.createOrFold<arith::AddIOp>(loc, ub, one,
- arith::IntegerOverflowFlags::nsw);
- }
- return ub;
-}
-
/// Handle differing types between SCF (index) and ACC loops.
/// Creates casts from the new SCF IVs to the original ACC IV types and updates
/// the mapping. The newIVs should correspond 1:1 with the ACC loop's IVs.
@@ -201,16 +179,6 @@ wrapMultiBlockRegionWithSCFExecuteRegion(Region ®ion, IRMapping &mapping,
return exeRegionOp;
}
-/// Return true if \p v is a constant index with a negative value.
-static bool isNegativeConstantIndex(Value v) {
- if (auto cst = v.getDefiningOp<arith::ConstantIndexOp>())
- return cst.value() < 0;
- if (auto cst = v.getDefiningOp<arith::ConstantOp>())
- if (auto intAttr = dyn_cast<IntegerAttr>(cst.getValue()))
- return intAttr.getInt() < 0;
- return false;
-}
-
scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
bool enableCollapse) {
assert(!loopOp.getUnstructured() &&
@@ -218,87 +186,55 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
"loops");
Location loc = loopOp->getLoc();
- Type indexType = rewriter.getIndexType();
- // Create nested scf.for loops and build IR mapping for IVs
IRMapping mapping;
SmallVector<scf::ForOp> forOps;
- // Save the original insertion point
OpBuilder::InsertionGuard guard(rewriter);
rewriter.setInsertionPoint(loopOp);
- // Determine which loops need normalization (negative constant step).
- // scf.for requires a positive step, so loops with a negative step must be
- // normalized to lb=0, step=1, ub=tripCount with IV denormalization.
- SmallVector<bool> needsNormalize;
- for (BlockArgument iv : loopOp.getBody().getArguments()) {
- size_t idx = iv.getArgNumber();
- needsNormalize.push_back(isNegativeConstantIndex(loopOp.getStep()[idx]));
- }
+ // Normalize all loops: lb=0, step=1, ub=tripCount.
+ // scf.for requires a positive step, but acc.loop may have arbitrary steps
+ // (including negative). Normalizing unconditionally keeps this consistent
+ // with convertACCLoopToSCFParallel and lets later passes fold constants.
+ Value zero = arith::ConstantIndexOp::create(rewriter, loc, 0);
+ Value one = arith::ConstantIndexOp::create(rewriter, loc, 1);
- // Pre-compute all loop bounds before creating any ForOp.
- SmallVector<Value> lowerBounds, upperBounds, steps;
- for (BlockArgument iv : loopOp.getBody().getArguments()) {
- size_t idx = iv.getArgNumber();
-
- if (needsNormalize[idx]) {
- bool inclusiveUpperbound = false;
- if (loopOp.getInclusiveUpperbound().has_value())
- inclusiveUpperbound =
- loopOp.getInclusiveUpperboundAttr().asArrayRef()[idx];
-
- Value zero = arith::ConstantIndexOp::create(rewriter, loc, 0);
- Value one = arith::ConstantIndexOp::create(rewriter, loc, 1);
- Value tc = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
- loopOp.getUpperbound()[idx],
- loopOp.getStep()[idx], inclusiveUpperbound);
- lowerBounds.push_back(zero);
- upperBounds.push_back(tc);
- steps.push_back(one);
- } else {
- Value newLB = getValueOrCreateCastToIndexLike(
- rewriter, loc, indexType, loopOp.getLowerbound()[idx]);
- Value newUB = getExclusiveUpperBoundAsIndex(loopOp, idx, rewriter);
- Value newStep = getValueOrCreateCastToIndexLike(rewriter, loc, indexType,
- loopOp.getStep()[idx]);
- lowerBounds.push_back(newLB);
- upperBounds.push_back(newUB);
- steps.push_back(newStep);
- }
- }
+ SmallVector<Value> tripCounts;
+ for (auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
+ bool inclusiveUpperbound = false;
+ if (loopOp.getInclusiveUpperbound().has_value())
+ inclusiveUpperbound =
+ loopOp.getInclusiveUpperboundAttr().asArrayRef()[idx];
- // Now create the nested ForOps using the pre-computed bounds
- for (BlockArgument iv : loopOp.getBody().getArguments()) {
- size_t idx = iv.getArgNumber();
+ Value tc = calculateTripCount(rewriter, loc, loopOp.getLowerbound()[idx],
+ loopOp.getUpperbound()[idx],
+ loopOp.getStep()[idx], inclusiveUpperbound);
+ tripCounts.push_back(tc);
+ }
- // For nested loops, insert inside the previous loop's body
+ for (auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
if (idx > 0)
rewriter.setInsertionPointToStart(forOps.back().getBody());
- scf::ForOp forOp = scf::ForOp::create(rewriter, loc, lowerBounds[idx],
- upperBounds[idx], steps[idx]);
+ scf::ForOp forOp =
+ scf::ForOp::create(rewriter, loc, zero, tripCounts[idx], one);
forOps.push_back(forOp);
mapping.map(iv, forOp.getInductionVar());
}
- // Set insertion point inside the innermost loop for IV casts and body cloning
rewriter.setInsertionPointToStart(forOps.back().getBody());
- // Handle IV type conversion (index -> original type)
SmallVector<Value> scfIVs;
for (scf::ForOp forOp : forOps)
scfIVs.push_back(forOp.getInductionVar());
mapACCLoopIVsToSCFIVs(loopOp, scfIVs, rewriter, mapping);
- // Clone the loop body into the innermost scf.for
cloneACCRegionIntoForLoop(&loopOp.getRegion(), forOps.back().getBody(),
rewriter.getInsertionPoint(), mapping, rewriter);
- // Denormalize IV uses for normalized loops only
+ // Denormalize IV uses: original_iv = normalized_iv * orig_step + orig_lb
for (size_t idx = 0; idx < forOps.size(); ++idx) {
- if (!needsNormalize[idx])
- continue;
Value iv = forOps[idx].getInductionVar();
if (!iv.use_empty()) {
rewriter.setInsertionPointToStart(forOps[idx].getBody());
@@ -307,7 +243,6 @@ scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, RewriterBase &rewriter,
}
}
- // Optionally collapse nested loops
if (enableCollapse && forOps.size() > 1)
if (failed(coalesceLoops(rewriter, forOps)))
loopOp.emitError("failed to collapse acc.loop");
diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
index 0925864f970f5..6a80b8987ac27 100644
--- a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
+++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
@@ -242,18 +242,16 @@ TEST_F(OpenACCUtilsLoopTest, ConvertLoopWithNonConstantBounds) {
ASSERT_TRUE(forOp);
- // Positive step: bounds passed through directly (no normalization)
- EXPECT_EQ(forOp.getLowerBound(), lb);
-
- // Upper bound should be ub + 1 (for inclusive -> exclusive conversion)
- auto ubAddOp = forOp.getUpperBound().getDefiningOp<arith::AddIOp>();
- ASSERT_TRUE(ubAddOp);
- EXPECT_EQ(ubAddOp.getLhs(), ub);
- auto oneConst = getConstantIndex(ubAddOp.getRhs());
- ASSERT_TRUE(oneConst.has_value());
- EXPECT_EQ(*oneConst, 1);
-
- EXPECT_EQ(forOp.getStep(), step);
+ // Normalized: lb=0, step=1, ub=tripCount (computed from dynamic bounds)
+ auto lbConst = getConstantIndex(forOp.getLowerBound());
+ ASSERT_TRUE(lbConst.has_value());
+ EXPECT_EQ(*lbConst, 0);
+
+ auto stepConst = getConstantIndex(forOp.getStep());
+ ASSERT_TRUE(stepConst.has_value());
+ EXPECT_EQ(*stepConst, 1);
+
+ EXPECT_FALSE(getConstantIndex(forOp.getUpperBound()).has_value());
}
TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForWithCollapse) {
@@ -350,10 +348,19 @@ TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForExclusiveUpperBound) {
ASSERT_TRUE(forOp);
- // Positive step: bounds passed through directly (no normalization)
- EXPECT_EQ(forOp.getLowerBound(), c0);
- EXPECT_EQ(forOp.getUpperBound(), c10);
- EXPECT_EQ(forOp.getStep(), c1);
+ // Normalized: lb=0, step=1, ub=tripCount
+ // For exclusive [0, 10) with step 1: tripCount = (9 - 0 + 1) / 1 = 10
+ auto lbConst = getConstantIndex(forOp.getLowerBound());
+ ASSERT_TRUE(lbConst.has_value());
+ EXPECT_EQ(*lbConst, 0);
+
+ auto ubConst = getConstantIndex(forOp.getUpperBound());
+ ASSERT_TRUE(ubConst.has_value());
+ EXPECT_EQ(*ubConst, 10);
+
+ auto stepConst = getConstantIndex(forOp.getStep());
+ ASSERT_TRUE(stepConst.has_value());
+ EXPECT_EQ(*stepConst, 1);
}
TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForNegativeStep) {
@@ -363,7 +370,7 @@ TEST_F(OpenACCUtilsLoopTest, ConvertLoopToSCFForNegativeStep) {
Value c1 = createIndexConstant(1);
Value cNeg1 = createIndexConstant(-1);
- // acc.loop from 10 to 1 step -1 (inclusive), like Fortran DO k = 10, 1, -1
+ // acc.loop from 10 to 1 step -1 (inclusive)
acc::LoopOp loopOp = createLoopOp({c10}, {c1}, {cNeg1});
scf::ForOp forOp =
convertACCLoopToSCFFor(loopOp, b, /*enableCollapse=*/false);
More information about the Mlir-commits
mailing list