[flang-commits] [flang] 23be203 - [flang][OpenACC] support collapse on unstructured acc.loop (#196174)
via flang-commits
flang-commits at lists.llvm.org
Fri May 8 10:58:55 PDT 2026
Author: Eugene Epshteyn
Date: 2026-05-08T13:58:51-04:00
New Revision: 23be2035d437ed23abb22f4dff1752eb82adb659
URL: https://github.com/llvm/llvm-project/commit/23be2035d437ed23abb22f4dff1752eb82adb659
DIFF: https://github.com/llvm/llvm-project/commit/23be2035d437ed23abb22f4dff1752eb82adb659.diff
LOG: [flang][OpenACC] support collapse on unstructured acc.loop (#196174)
PR #164992 added unstructured-loop support to OpenACC lowering (no
bounds on acc.loop, IVs privatized, body emitted as explicit cf), but it
didn't covered the `collapse(N)` case. Compiling
```
!$acc parallel loop collapse(2)
do j = 1, n
do i = 1, n
if (i == jdiag) then
a(i,j) = 0.0d0
cycle
end if
a(i,j) = real(i + j, 8)
end do
end do
```
asserted in MLIR's runRegionDCE: "Assertion `mightHaveTerminator()'
failed".
Root cause: visitLoopControl unconditionally marked every inner DO of a
collapsed nest via markDoConstructAsCollapsed. genFIR(DoConstruct) then
read that marking and skipped the inner DO's loop machinery on the
assumption that the parent acc.loop iterates and supplies the IV via a
block argument. That assumption holds for the structured case, but not
for the unstructured case added in #164992. Skipping it left the
PFT-pre-allocated scaffold blocks (pre-header, header, exit) without
terminators.
Fix: add a markInnerCollapsed parameter (default true) to
visitLoopControl, and pass false from privatizeInductionVariables (the
unstructured case of buildACCLoopOp).
Assisted-by: AI
Added:
Modified:
flang/lib/Lower/OpenACC.cpp
flang/test/Lower/OpenACC/acc-unstructured.f90
Removed:
################################################################################
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 161023aa51db8..2e0ae0bdf7a27 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1504,13 +1504,22 @@ static void determineDefaultLoopParMode(
}
// Helper to visit Bounds of DO LOOP nest.
+//
+// When `markInnerCollapsed` is true (the default), inner DOs that are absorbed
+// into the parent acc.loop's collapse semantics are tagged via
+// markDoConstructAsCollapsed so that genFIR(DoConstruct) skips re-emitting
+// their loop machinery (the parent acc.loop iterates and supplies the IV via
+// a block argument). Pass `false` when the parent acc.loop is unstructured:
+// it has no IV block arguments and the inner DO's iteration mechanics must
+// be emitted as ordinary control flow inside the acc.loop body.
static void visitLoopControl(
Fortran::lower::AbstractConverter &converter,
const Fortran::parser::DoConstruct &outerDoConstruct,
uint64_t loopsToProcess, Fortran::lower::pft::Evaluation &eval,
std::function<void(const Fortran::parser::LoopControl::Bounds &,
mlir::Location)>
- callback) {
+ callback,
+ bool markInnerCollapsed = true) {
Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation();
for (uint64_t i = 0; i < loopsToProcess; ++i) {
const Fortran::parser::LoopControl *loopControl;
@@ -1537,7 +1546,8 @@ static void visitLoopControl(
if (!innerDo)
break; // No deeper loop; stop collecting collapsed bounds.
- Fortran::lower::markDoConstructAsCollapsed(*innerDo);
+ if (markInnerCollapsed)
+ Fortran::lower::markDoConstructAsCollapsed(*innerDo);
mlir::Location loc =
converter.genLocation(Fortran::parser::FindSourceLocation(*innerDo));
if (innerDo->IsDoConcurrent())
@@ -1920,15 +1930,24 @@ static void privatizeInductionVariables(
llvm::SmallVector<mlir::Location> ivLocs;
assert(!outerDoConstruct.IsDoConcurrent() &&
"do concurrent loops are not expected to contained early exits");
- visitLoopControl(converter, outerDoConstruct, loopsToProcess, eval,
- [&](const Fortran::parser::LoopControl::Bounds &bounds,
- mlir::Location loc) {
- locs.push_back(loc);
- Fortran::semantics::Symbol &ivSym =
- bounds.Name().thing.symbol->GetUltimate();
- privatizeIv(converter, ivSym, currentLocation, ivTypes,
- ivLocs, privateOperands, ivPrivate);
- });
+ // Do not mark inner DOs as "collapsed": the parent acc.loop is unstructured
+ // and has no IV block arguments, so each inner DO must emit its own
+ // iteration mechanics as cf inside the acc.loop body. Marking them collapsed
+ // would cause genFIR(DoConstruct) to lower only the body and leave the
+ // pre-allocated PFT scaffold blocks (preheader, header, exit) without
+ // terminators -- which then trips runRegionDCE's mightHaveTerminator
+ // assertion.
+ visitLoopControl(
+ converter, outerDoConstruct, loopsToProcess, eval,
+ [&](const Fortran::parser::LoopControl::Bounds &bounds,
+ mlir::Location loc) {
+ locs.push_back(loc);
+ Fortran::semantics::Symbol &ivSym =
+ bounds.Name().thing.symbol->GetUltimate();
+ privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs,
+ privateOperands, ivPrivate);
+ },
+ /*markInnerCollapsed=*/false);
}
static mlir::acc::LoopOp
diff --git a/flang/test/Lower/OpenACC/acc-unstructured.f90 b/flang/test/Lower/OpenACC/acc-unstructured.f90
index 57b678c1200d9..ce58ae90bdc35 100644
--- a/flang/test/Lower/OpenACC/acc-unstructured.f90
+++ b/flang/test/Lower/OpenACC/acc-unstructured.f90
@@ -222,3 +222,91 @@ subroutine test_unstructured8(a, n)
! CHECK: fir.load %{{.*}} : !fir.ref<i32>
! CHECK: arith.cmpi eq
! CHECK: cf.cond_br
+
+! Test that `acc parallel loop collapse(N)` whose body has an early-exit
+! (here, `if (cond) then ... cycle ... end if`) lowers cleanly. The
+! corresponding acc.loop must privatize all N induction variables, carry
+! both `collapse = [N]` and `unstructured` attributes, and emit the
+! iteration mechanics for all N levels as explicit cf inside the body.
+! Reproducer derived from lorado issue #2856.
+subroutine test_unstructured_collapse_cycle(a)
+ integer :: i, j, jdiag
+ real(8) :: a(:,:)
+ jdiag = 4
+ !$acc parallel loop collapse(2) copy(a)
+ do j = 1, 8
+ do i = 1, 8
+ if (i == jdiag) then
+ a(i, j) = 0.0d0
+ cycle
+ end if
+ a(i, j) = real(i + j, 8)
+ end do
+ end do
+ !$acc end parallel loop
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_unstructured_collapse_cycle
+! CHECK: acc.parallel combined(loop)
+! Both induction variables (j and i) are privatized:
+! CHECK: %[[PRIVJ:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "j"}
+! CHECK: %[[PRIVI:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "i"}
+! No control(...) on acc.loop — bounds are not on the op:
+! CHECK: acc.loop combined(parallel) private(%[[PRIVJ]], %[[PRIVI]] : !fir.ref<i32>, !fir.ref<i32>) {
+! Outer loop trip-count test (j) emitted as cf:
+! CHECK: arith.cmpi sgt
+! CHECK: cf.cond_br
+! Inner loop trip-count test (i) emitted as cf:
+! CHECK: arith.cmpi sgt
+! CHECK: cf.cond_br
+! The if/cycle is a structured cf branch in the body:
+! CHECK: arith.cmpi eq
+! CHECK: cf.cond_br
+! CHECK: acc.yield
+! CHECK: } attributes {collapse = [2], collapseDeviceType = [#acc.device_type<none>], independent = [#acc.device_type<none>], unstructured}
+
+! Test that `acc parallel loop collapse(N)` lowers cleanly when the early-exit
+! is a STOP (the form already covered for collapse=1 by test_unstructured2).
+subroutine test_unstructured_collapse_stop(a)
+ integer :: i, j, k
+ real :: a(:,:,:)
+ !$acc parallel loop collapse(3)
+ do i = 1, 10
+ do j = 1, 10
+ do k = 1, 10
+ if (a(1,2,3) > 10) stop 'just to be unstructured'
+ end do
+ end do
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_unstructured_collapse_stop
+! All three IVs privatized:
+! CHECK: acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "j"}
+! CHECK: acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "k"}
+! CHECK: acc.loop combined(parallel) private(%{{.*}}, %{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK: fir.call @_FortranAStopStatementText
+! CHECK: } attributes {collapse = [3], collapseDeviceType = [#acc.device_type<none>], independent = [#acc.device_type<none>], unstructured}
+
+! Test orphaned `acc loop collapse(N)`
+subroutine test_unstructured_collapse_loop_only(a)
+ integer :: i, j, jdiag
+ real(8) :: a(:,:)
+ jdiag = 4
+ !$acc loop collapse(2)
+ do j = 1, 8
+ do i = 1, 8
+ if (i == jdiag) then
+ a(i, j) = 0.0d0
+ cycle
+ end if
+ a(i, j) = real(i + j, 8)
+ end do
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_unstructured_collapse_loop_only
+! Standalone acc.loop (no `combined(...)`):
+! CHECK: acc.loop private(%{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK: } attributes {collapse = [2], collapseDeviceType = [#acc.device_type<none>], independent = [#acc.device_type<none>], unstructured}
More information about the flang-commits
mailing list