[flang-commits] [flang] [flang][acc] Fix crash on collapse(force:N) with non-tightly nested loops (PR #191310)
via flang-commits
flang-commits at lists.llvm.org
Tue Apr 14 17:54:35 PDT 2026
https://github.com/khaki3 updated https://github.com/llvm/llvm-project/pull/191310
>From d5f542d5c4360c2c4bdaa67193a7f9af68e1908c Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Tue, 14 Apr 2026 17:53:38 -0700
Subject: [PATCH] [Flang][OpenACC] Fix collapse(force:N) with non-tightly
nested loops
When collapse(force:N) is applied to non-tightly nested loops, the
compiler could crash or generate redundant inner loops.
Crashes occurred because getNestedEvaluations() was called without
checking hasNestedEvaluations() first. Add guards in hasEarlyReturn(),
createRegionOp(), and the collapse-force sinking logic in Bridge.cpp.
Redundant inner loops were generated because processDoLoopBounds
absorbed N levels of do-loops into the outer acc.loop, but the PFT
walker still generated separate acc.loop ops for those same loops.
Fix by tracking absorbed DoConstruct* pointers in visitLoopControl
and skipping them in genFIR(DoConstruct).
Made-with: Cursor
---
flang/include/flang/Lower/OpenACC.h | 10 ++++++
flang/lib/Lower/Bridge.cpp | 36 +++++++++++++------
flang/lib/Lower/OpenACC.cpp | 23 +++++++++++-
.../acc-loop-collapse-force-lowering.f90 | 15 +++-----
...loop-collapse-force-non-tightly-nested.f90 | 28 +++++++++++++++
5 files changed, 90 insertions(+), 22 deletions(-)
create mode 100644 flang/test/Lower/OpenACC/acc-loop-collapse-force-non-tightly-nested.f90
diff --git a/flang/include/flang/Lower/OpenACC.h b/flang/include/flang/Lower/OpenACC.h
index 745c8686457f4..02b4331a1f64e 100644
--- a/flang/include/flang/Lower/OpenACC.h
+++ b/flang/include/flang/Lower/OpenACC.h
@@ -110,6 +110,16 @@ getCollapseSizeAndForce(const Fortran::parser::AccClauseList &);
/// Checks whether the current insertion point is inside OpenACC loop.
bool isInOpenACCLoop(fir::FirOpBuilder &);
+/// Record a DoConstruct as having been absorbed by a collapse clause.
+/// The PFT walker should skip generating a loop for it.
+void markDoConstructAsCollapsed(const Fortran::parser::DoConstruct &);
+
+/// Check whether a DoConstruct was absorbed by a collapse clause.
+bool isCollapsedDoConstruct(const Fortran::parser::DoConstruct &);
+
+/// Clear the collapsed DoConstruct tracking set.
+void clearCollapsedDoConstructs();
+
/// Checks whether the current insertion point is inside OpenACC compute
/// construct.
bool isInsideOpenACCComputeConstruct(fir::FirOpBuilder &);
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 19ab789018f40..f55db18265ba5 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2501,6 +2501,16 @@ class FirConverter : public Fortran::lower::AbstractConverter {
Fortran::lower::pft::Evaluation &eval = getEval();
bool unstructuredContext = eval.lowerAsUnstructured();
+ // If this do-loop was absorbed by a collapse clause on a parent acc.loop,
+ // skip generating any loop — just lower the body. The IV value is
+ // already available from the parent acc.loop's block argument.
+ if (Fortran::lower::isCollapsedDoConstruct(doConstruct)) {
+ auto iter = eval.getNestedEvaluations().begin();
+ for (auto end = --eval.getNestedEvaluations().end(); iter != end; ++iter)
+ genFIR(*iter, unstructuredContext);
+ return;
+ }
+
// Loops with induction variables inside OpenACC compute constructs
// need special handling to ensure that the IVs are privatized.
if (Fortran::lower::isInsideOpenACCComputeConstruct(*builder)) {
@@ -3591,6 +3601,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
}
void genFIR(const Fortran::parser::OpenACCConstruct &acc) {
+ Fortran::lower::clearCollapsedDoConstructs();
mlir::OpBuilder::InsertPoint insertPt = builder->saveInsertionPoint();
// Cache constructs should not push/pop a scope because they need to update
@@ -3649,8 +3660,11 @@ class FirConverter : public Fortran::lower::AbstractConverter {
if (curEval->lowerAsStructured()) {
curEval = &curEval->getFirstNestedEvaluation();
- for (uint64_t i = 1; i < loopCount; i++)
+ for (uint64_t i = 1; i < loopCount; i++) {
+ if (!curEval->hasNestedEvaluations())
+ break;
curEval = &*std::next(curEval->getNestedEvaluations().begin());
+ }
}
}
@@ -3677,9 +3691,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
}
prologue.push_back(&*it);
}
- // Semantics guarantees collapseDepth does not exceed nest depth
- // so childLoop must be found here.
- assert(childLoop && "Expected inner DoConstruct for collapse");
+ if (!childLoop)
+ break;
parent = childLoop;
innermostLoopEval = childLoop;
}
@@ -3700,16 +3713,19 @@ class FirConverter : public Fortran::lower::AbstractConverter {
sink(prologue);
// Lower innermost loop body, skipping sunk
- for (Fortran::lower::pft::Evaluation &e :
- innermostLoopEval->getNestedEvaluations())
- if (!sunk.contains(&e))
- genFIR(e);
+ if (innermostLoopEval && innermostLoopEval->hasNestedEvaluations())
+ for (Fortran::lower::pft::Evaluation &e :
+ innermostLoopEval->getNestedEvaluations())
+ if (!sunk.contains(&e))
+ genFIR(e);
sink(epilogue);
} else {
// Normal lowering
- for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations())
- genFIR(e);
+ if (curEval->hasNestedEvaluations())
+ for (Fortran::lower::pft::Evaluation &e :
+ curEval->getNestedEvaluations())
+ genFIR(e);
}
if (!isCacheConstruct)
localSymbols.popScope();
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 5a7fe899b372f..1eb9dd7cf22f2 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1215,7 +1215,8 @@ createRegionOp(fir::FirOpBuilder &builder, mlir::Location loc,
// If it is an unstructured region and is not the outer region of a combined
// construct, create empty blocks for all evaluations.
- if (eval.lowerAsUnstructured() && !outerCombined)
+ if (eval.lowerAsUnstructured() && !outerCombined &&
+ eval.hasNestedEvaluations())
Fortran::lower::createEmptyRegionBlocks<mlir::acc::TerminatorOp,
mlir::acc::YieldOp>(
builder, eval.getNestedEvaluations());
@@ -1544,6 +1545,7 @@ static void visitLoopControl(
if (!innerDo)
break; // No deeper loop; stop collecting collapsed bounds.
+ Fortran::lower::markDoConstructAsCollapsed(*innerDo);
loopControl = &*innerDo->GetLoopControl();
mlir::Location loc =
converter.genLocation(Fortran::parser::FindSourceLocation(*innerDo));
@@ -2033,6 +2035,8 @@ buildACCLoopOp(Fortran::lower::AbstractConverter &converter,
}
static bool hasEarlyReturn(Fortran::lower::pft::Evaluation &eval) {
+ if (!eval.hasNestedEvaluations())
+ return false;
bool hasReturnStmt = false;
for (auto &e : eval.getNestedEvaluations()) {
e.visit(Fortran::common::visitors{
@@ -4684,6 +4688,23 @@ bool Fortran::lower::isInOpenACCLoop(fir::FirOpBuilder &builder) {
return false;
}
+static llvm::SmallPtrSet<const Fortran::parser::DoConstruct *, 8>
+ collapsedDoConstructs;
+
+void Fortran::lower::markDoConstructAsCollapsed(
+ const Fortran::parser::DoConstruct &dc) {
+ collapsedDoConstructs.insert(&dc);
+}
+
+bool Fortran::lower::isCollapsedDoConstruct(
+ const Fortran::parser::DoConstruct &dc) {
+ return collapsedDoConstructs.contains(&dc);
+}
+
+void Fortran::lower::clearCollapsedDoConstructs() {
+ collapsedDoConstructs.clear();
+}
+
bool Fortran::lower::isInsideOpenACCComputeConstruct(
fir::FirOpBuilder &builder) {
return mlir::isa_and_nonnull<ACC_COMPUTE_CONSTRUCT_OPS>(
diff --git a/flang/test/Lower/OpenACC/acc-loop-collapse-force-lowering.f90 b/flang/test/Lower/OpenACC/acc-loop-collapse-force-lowering.f90
index ca932c1b159ba..0850eb5b696e8 100644
--- a/flang/test/Lower/OpenACC/acc-loop-collapse-force-lowering.f90
+++ b/flang/test/Lower/OpenACC/acc-loop-collapse-force-lowering.f90
@@ -1,7 +1,7 @@
! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
-! Verify collapse(force:2) sinks prologue (between loops) and epilogue (after inner loop)
-! into the acc.loop region body.
+! Verify collapse(force:2) sinks prologue/epilogue and eliminates
+! the inner loop (absorbed by the outer collapse).
subroutine collapse_force_sink(n, m)
integer, intent(in) :: n, m
@@ -22,20 +22,13 @@ subroutine collapse_force_sink(n, m)
! CHECK: func.func @_QPcollapse_force_sink(
! CHECK: acc.parallel
-! Ensure outer acc.loop is combined(parallel)
-! CHECK: acc.loop combined(parallel)
-! Prologue: constant 4.2 and an assign before inner loop
+! Only one acc.loop should exist (the outer collapsed one)
+! CHECK-COUNT-1: acc.loop
! CHECK: arith.constant 4.200000e+00
! CHECK: hlfir.assign
-! Inner loop and its body include 2.0 add and an assign
-! CHECK: acc.loop
! CHECK: arith.constant 2.000000e+00
! CHECK: arith.addf
! CHECK: hlfir.assign
-! Epilogue: constant 7.3 and an assign after inner loop
! CHECK: arith.constant 7.300000e+00
! CHECK: hlfir.assign
-! And the outer acc.loop has collapse = [2]
! CHECK: } attributes {collapse = [2]
-
-
diff --git a/flang/test/Lower/OpenACC/acc-loop-collapse-force-non-tightly-nested.f90 b/flang/test/Lower/OpenACC/acc-loop-collapse-force-non-tightly-nested.f90
new file mode 100644
index 0000000000000..5eed4dc5a5c76
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-loop-collapse-force-non-tightly-nested.f90
@@ -0,0 +1,28 @@
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+
+! Verify that collapse(force:N) on non-tightly nested loops (with code
+! between loops) does not crash the compiler.
+
+subroutine collapse_force_non_tight(a, n)
+ implicit none
+ integer, intent(in) :: n
+ real, intent(inout) :: a(n, n, n)
+ integer :: i, j, k
+ real :: tmp
+
+ !$acc parallel loop collapse(force:3) copy(a)
+ do i = 1, n
+ do j = 1, n
+ tmp = real(i + j)
+ do k = 1, n
+ a(i, j, k) = tmp + real(k)
+ end do
+ end do
+ end do
+ !$acc end parallel loop
+end subroutine
+
+! CHECK: func.func @_QPcollapse_force_non_tight
+! CHECK: acc.parallel
+! CHECK: acc.loop
+! CHECK: collapse = [3]
More information about the flang-commits
mailing list