[flang-commits] [flang] [OpenACC][flang] Emit NYI when unstructured loops are associated with OpenACC directives (PR #202948)

Fri Jun 12 00:53:27 PDT 2026

https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/202948

>From 03aafcf4b1e28d5c394bfd7aad3a7ce9a99ed262 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at gmail.com>
Date: Wed, 10 Jun 2026 03:54:26 -0700
Subject: [PATCH 1/2] [OpenACC][flang] Emit NYI when unstructured loops are
 associated with OpenACC directives

When an unstructured loop is associated with a loop or a combined
directive, we emit an unstructured CFG for the loop's logic nested
within the OpenACC op. This effectively serializes the nested loop on
the device which is not desirable. For now, emit NYI's while working on
a longer-term solution.

The NYI is restricted to the cases where the loop will be lowered with
`independent` parallelism semantics for the default device_type -- i.e.,
the user has explicitly promised the loop is parallel. This covers:
- combined `acc parallel loop`,
- standalone `acc loop` inside `acc parallel`,
- orphan `acc loop` inside a non-`seq` acc routine,
- explicit `independent` clause.

For `auto` (`acc kernels loop` and `acc loop` inside `acc kernels`) and
for `seq` (`acc serial loop`, `acc loop` inside `acc serial`, explicit
`seq`, or orphan inside a `seq` routine), the user has not made a
parallelism guarantee, so falling back to unstructured CFG inside the
acc.loop is acceptable and not silently incorrect.

Co-author: Claude Opus 4.7
---
 flang/lib/Lower/OpenACC.cpp                   | 139 +++++++++--
 .../acc-unstructured-combined-construct.f90   |  69 ++++++
 .../Todo/acc-unstructured-loop-construct.f90  | 120 +++++++++
 flang/test/Lower/OpenACC/acc-cache.f90        | 119 +--------
 flang/test/Lower/OpenACC/acc-loop-exit.f90    |  41 ---
 flang/test/Lower/OpenACC/acc-unstructured.f90 | 233 +++++++-----------
 6 files changed, 400 insertions(+), 321 deletions(-)
 create mode 100644 flang/test/Lower/OpenACC/Todo/acc-unstructured-combined-construct.f90
 create mode 100644 flang/test/Lower/OpenACC/Todo/acc-unstructured-loop-construct.f90
 delete mode 100644 flang/test/Lower/OpenACC/acc-loop-exit.f90

diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 1c51cf7fa6ca5..51c9356cc8d46 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1308,6 +1308,41 @@ static void gatherDeviceTypeAttrs(
         builder.getContext(), getDeviceType(deviceTypeExpr.v)));
 }
 
+// Tries to handle a clause that affects the loop's parallelism-mode tracking:
+//   - `seq` / `auto` / `independent`: append the current device_type set to
+//     the corresponding list.
+//   - `device_type`: replace the current device_type set.
+// Returns true if the clause was handled.
+static bool tryHandleLoopParModeClause(
+    fir::FirOpBuilder &builder, const Fortran::parser::AccClause &clause,
+    llvm::SmallVector<mlir::Attribute> &crtDeviceTypes,
+    llvm::SmallVector<mlir::Attribute> &seqDeviceTypes,
+    llvm::SmallVector<mlir::Attribute> &independentDeviceTypes,
+    llvm::SmallVector<mlir::Attribute> &autoDeviceTypes) {
+  if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) {
+    for (auto crtDeviceTypeAttr : crtDeviceTypes)
+      seqDeviceTypes.push_back(crtDeviceTypeAttr);
+    return true;
+  }
+  if (std::get_if<Fortran::parser::AccClause::Auto>(&clause.u)) {
+    for (auto crtDeviceTypeAttr : crtDeviceTypes)
+      autoDeviceTypes.push_back(crtDeviceTypeAttr);
+    return true;
+  }
+  if (std::get_if<Fortran::parser::AccClause::Independent>(&clause.u)) {
+    for (auto crtDeviceTypeAttr : crtDeviceTypes)
+      independentDeviceTypes.push_back(crtDeviceTypeAttr);
+    return true;
+  }
+  if (const auto *deviceTypeClause =
+          std::get_if<Fortran::parser::AccClause::DeviceType>(&clause.u)) {
+    crtDeviceTypes.clear();
+    gatherDeviceTypeAttrs(builder, deviceTypeClause, crtDeviceTypes);
+    return true;
+  }
+  return false;
+}
+
 static void genIfClause(Fortran::lower::AbstractConverter &converter,
                         mlir::Location clauseLocation,
                         const Fortran::parser::AccClause::If *ifClause,
@@ -1518,6 +1553,79 @@ static void determineDefaultLoopParMode(
   }
 }
 
+// Returns true when the acc loop being constructed will have `independent`
+// parallelism semantics for the default device_type (i.e., DeviceType::None).
+//
+// `directive` is the OpenACC directive that the loop is associated with.
+//
+// The NYI for unstructured loops nested in an acc loop / combined construct is
+// only meaningful when the user has promised parallelism. For `auto` and `seq`
+// the user has not made that promise, so falling back to unstructured CFG
+// inside the acc.loop is acceptable.
+static bool
+loopWillBeIndependent(Fortran::lower::AbstractConverter &converter,
+                      const Fortran::parser::AccClauseList &accClauseList,
+                      llvm::acc::Directive directive) {
+  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+
+  // Walk the clauses and collect seq/auto/independent attributes per
+  // device_type. Other clauses are ignored.
+  llvm::SmallVector<mlir::Attribute> seqDeviceTypes, independentDeviceTypes,
+      autoDeviceTypes;
+  llvm::SmallVector<mlir::Attribute> crtDeviceTypes;
+  crtDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
+      builder.getContext(), mlir::acc::DeviceType::None));
+
+  for (const Fortran::parser::AccClause &clause : accClauseList.v)
+    tryHandleLoopParModeClause(builder, clause, crtDeviceTypes, seqDeviceTypes,
+                               independentDeviceTypes, autoDeviceTypes);
+
+  auto hasDeviceNone = [](mlir::Attribute attr) -> bool {
+    return mlir::dyn_cast<mlir::acc::DeviceTypeAttr>(attr).getValue() ==
+           mlir::acc::DeviceType::None;
+  };
+
+  if (llvm::any_of(independentDeviceTypes, hasDeviceNone))
+    return true;
+  if (llvm::any_of(seqDeviceTypes, hasDeviceNone) ||
+      llvm::any_of(autoDeviceTypes, hasDeviceNone))
+    return false;
+
+  // No explicit parallelism clause for the default device_type. Defer to the
+  // directive: combined constructs imply a parallelism mode for the loop;
+  // a standalone `acc loop` defers to its enclosing compute op (or the routine
+  // attribute for orphaned loops).
+  switch (directive) {
+  case llvm::acc::ACCD_parallel_loop:
+    return true;
+  case llvm::acc::ACCD_kernels_loop:
+  case llvm::acc::ACCD_serial_loop:
+    return false;
+  case llvm::acc::ACCD_loop: {
+    // The OpenACC spec treats any orphan loop as `independent` by default,
+    // but a parallelism promise only really exists when the enclosing
+    // function is declared `acc routine` or there an `acc parallel` parent.
+    // Orphan loops in plain (non-acc-routine) functions won't run as device
+    // code, so serializing their unstructured CFG should be harmless.
+    mlir::Region *currentRegion = builder.getBlock()->getParent();
+    mlir::Operation *parentOp =
+        mlir::acc::getEnclosingComputeOp(*currentRegion);
+
+    if (mlir::isa_and_present<mlir::acc::ParallelOp>(parentOp))
+      return true;
+
+    if (parentOp) // KernelsOp / SerialOp
+      return false;
+
+    // Orphan loop in an `acc routine`.
+    return mlir::acc::isAccRoutine(builder.getFunction().getOperation()) &&
+           !isInsideSeqOpenACCRoutine(builder);
+  }
+  default:
+    llvm_unreachable("unexpected directive for loopWillBeIndependent");
+  }
+}
+
 // Helper to visit Bounds of DO LOOP nest.
 //
 // When `markInnerCollapsed` is true (the default), inner DOs that are absorbed
@@ -2240,21 +2348,11 @@ static mlir::acc::LoopOp createLoopOp(
                     reductionOperands, /*async=*/{},
                     /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{},
                     &dataMap);
-    } else if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) {
-      for (auto crtDeviceTypeAttr : crtDeviceTypes)
-        seqDeviceTypes.push_back(crtDeviceTypeAttr);
-    } else if (std::get_if<Fortran::parser::AccClause::Independent>(
-                   &clause.u)) {
-      for (auto crtDeviceTypeAttr : crtDeviceTypes)
-        independentDeviceTypes.push_back(crtDeviceTypeAttr);
-    } else if (std::get_if<Fortran::parser::AccClause::Auto>(&clause.u)) {
-      for (auto crtDeviceTypeAttr : crtDeviceTypes)
-        autoDeviceTypes.push_back(crtDeviceTypeAttr);
-    } else if (const auto *deviceTypeClause =
-                   std::get_if<Fortran::parser::AccClause::DeviceType>(
-                       &clause.u)) {
-      crtDeviceTypes.clear();
-      gatherDeviceTypeAttrs(builder, deviceTypeClause, crtDeviceTypes);
+    } else if (tryHandleLoopParModeClause(
+                   builder, clause, crtDeviceTypes, seqDeviceTypes,
+                   independentDeviceTypes, autoDeviceTypes)) {
+      // Updates to the relevant variables is already handled in
+      // tryHandleLoopParModeClause.
     } else if (const auto *collapseClause =
                    std::get_if<Fortran::parser::AccClause::Collapse>(
                        &clause.u)) {
@@ -2373,12 +2471,17 @@ genACC(Fortran::lower::AbstractConverter &converter,
 
   assert(loopDirective.v == llvm::acc::ACCD_loop &&
          "Unsupported OpenACC loop construct");
-  (void)loopDirective;
 
   const auto &accClauseList =
       std::get<Fortran::parser::AccClauseList>(beginLoopDirective.t);
   const auto &outerDoConstruct =
       std::get<std::optional<Fortran::parser::DoConstruct>>(loopConstruct.t);
+
+  if (outerDoConstruct.has_value() && eval.lowerAsUnstructured() &&
+      loopWillBeIndependent(converter, accClauseList, loopDirective.v))
+    TODO(currentLocation,
+         "unstructured do loop in independent OpenACC loop construct");
+
   auto loopOp = createLoopOp(converter, currentLocation, semanticsContext,
                              stmtCtx, *outerDoConstruct, eval, accClauseList,
                              /*combinedConstructs=*/{});
@@ -3148,6 +3251,10 @@ genACC(Fortran::lower::AbstractConverter &converter,
       converter.genLocation(beginCombinedDirective.source);
   Fortran::lower::StatementContext stmtCtx;
 
+  if (outerDoConstruct.has_value() && eval.lowerAsUnstructured() &&
+      loopWillBeIndependent(converter, accClauseList, combinedDirective.v))
+    TODO(currentLocation, "unstructured do loop in combined acc construct");
+
   if (combinedDirective.v == llvm::acc::ACCD_kernels_loop) {
     createComputeOp<mlir::acc::KernelsOp>(
         converter, currentLocation, eval, semanticsContext, stmtCtx,
diff --git a/flang/test/Lower/OpenACC/Todo/acc-unstructured-combined-construct.f90 b/flang/test/Lower/OpenACC/Todo/acc-unstructured-combined-construct.f90
new file mode 100644
index 0000000000000..da06ce8c2d170
--- /dev/null
+++ b/flang/test/Lower/OpenACC/Todo/acc-unstructured-combined-construct.f90
@@ -0,0 +1,69 @@
+! Each sub-file exercises a different unstructured-CFG pattern inside a
+! combined `acc parallel loop` construct (default parallelism is
+! `independent`).
+
+! RUN: split-file %s %t
+! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/stop_collapse1.f90 -o - 2>&1 | FileCheck %s --check-prefix=STOP1
+! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/cycle_collapse2.f90 -o - 2>&1 | FileCheck %s --check-prefix=CYCLE2
+! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/stop_collapse3.f90 -o - 2>&1 | FileCheck %s --check-prefix=STOP3
+
+!--- stop_collapse1.f90
+
+! `acc parallel loop` with STOP in the body. Loop defaults to `independent`.
+subroutine test_unstructured2(a, b, c)
+  integer :: i, j, k
+  real :: a(:,:,:), b(:,:,:), c(:,:,:)
+
+  !$acc parallel loop
+  do i = 1, 10
+    do j = 1, 10
+      do k = 1, 10
+        if (a(1,2,3) > 10) stop 'just to be unstructured'
+      end do
+    end do
+  end do
+
+end subroutine
+
+! STOP1: not yet implemented: unstructured do loop in combined acc construct
+
+!--- cycle_collapse2.f90
+
+! `acc parallel loop collapse(2)` with an early-exit (CYCLE).
+subroutine test_unstructured_collapse_cycle(a)
+  integer :: i, j, jdiag
+  real(8) :: a(:,:)
+  jdiag = 4
+  !$acc parallel loop collapse(2) copy(a)
+  do j = 1, 8
+    do i = 1, 8
+      if (i == jdiag) then
+        a(i, j) = 0.0d0
+        cycle
+      end if
+      a(i, j) = real(i + j, 8)
+    end do
+  end do
+  !$acc end parallel loop
+end subroutine
+
+! CYCLE2: not yet implemented: unstructured do loop in combined acc construct
+
+!--- stop_collapse3.f90
+
+! `acc parallel loop collapse(3)` with STOP - the collapse=3 form of the
+! STOP scenario above.
+subroutine test_unstructured_collapse_stop(a)
+  integer :: i, j, k
+  real :: a(:,:,:)
+  !$acc parallel loop collapse(3)
+  do i = 1, 10
+    do j = 1, 10
+      do k = 1, 10
+        if (a(1,2,3) > 10) stop 'just to be unstructured'
+      end do
+    end do
+  end do
+end subroutine
+
+! STOP3: not yet implemented: unstructured do loop in combined acc construct
diff --git a/flang/test/Lower/OpenACC/Todo/acc-unstructured-loop-construct.f90 b/flang/test/Lower/OpenACC/Todo/acc-unstructured-loop-construct.f90
new file mode 100644
index 0000000000000..285d970250c09
--- /dev/null
+++ b/flang/test/Lower/OpenACC/Todo/acc-unstructured-loop-construct.f90
@@ -0,0 +1,120 @@
+! Each sub-file exercises a different unstructured-CFG pattern inside an
+! `acc loop` whose default parallelism resolves to `independent`.
+
+! RUN: split-file %s %t
+! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/goto_one_level.f90 -o - 2>&1 | FileCheck %s --check-prefix=GOTO1
+! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/goto_with_intermediate.f90 -o - 2>&1 | FileCheck %s --check-prefix=GOTO2
+! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/collapse_cycle.f90 -o - 2>&1 | FileCheck %s --check-prefix=CCYCLE
+! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/cache_exit.f90 -o - 2>&1 | FileCheck %s --check-prefix=CEXIT
+! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/cache_select_case.f90 -o - 2>&1 | FileCheck %s --check-prefix=CCASE
+
+!--- goto_one_level.f90
+
+! GOTO exits the inner `acc loop seq` (one level), landing in the body of
+! the outer `acc loop gang vector`. Outer loop defaults to `independent`.
+subroutine test_unstructured6(N, A, B)
+  implicit real*8 (a-h, o-z)
+  !$acc routine gang
+  dimension A(*), B(*)
+  !$acc loop gang vector
+  do 100 i = 1, N
+  !$acc loop seq
+    do 10 j = 1, 1000
+      if (A(i) .gt. B(i)) goto 20
+10  continue
+20  B(i) = A(i)
+100 continue
+end subroutine
+
+! GOTO1: not yet implemented: unstructured do loop in independent OpenACC loop construct
+
+!--- goto_with_intermediate.f90
+
+! Same as above but with intermediate code between the inner loop end and
+! the GOTO target, exercising the jump-table dispatch path.
+subroutine test_unstructured7(A, B, C, N)
+  implicit real*8 (a-h, o-z)
+  !$acc routine gang
+  dimension A(*), B(*), C(*)
+  !$acc loop gang vector
+  do 100 i = 1, N
+  !$acc loop seq
+    do 10 j = 1, 1000
+      if (A(i) .gt. B(i)) goto 20
+10  continue
+    C(i) = 999.0
+20  B(i) = A(i)
+100 continue
+end subroutine
+
+! GOTO2: not yet implemented: unstructured do loop in independent OpenACC loop construct
+
+!--- collapse_cycle.f90
+
+! Orphan `acc loop collapse(2)` with an early-exit (CYCLE) - defaults to
+! `independent` inside the (non-seq) acc routine.
+subroutine test_unstructured_collapse_loop_only(a)
+  !$acc routine gang
+  integer :: i, j, jdiag
+  real(8) :: a(:,:)
+  jdiag = 4
+  !$acc loop collapse(2)
+  do j = 1, 8
+    do i = 1, 8
+      if (i == jdiag) then
+        a(i, j) = 0.0d0
+        cycle
+      end if
+      a(i, j) = real(i + j, 8)
+    end do
+  end do
+end subroutine
+
+! CCYCLE: not yet implemented: unstructured do loop in independent OpenACC loop construct
+
+!--- cache_exit.f90
+
+! `acc loop` with `cache` directive and EXIT inside the body - the EXIT
+! makes the loop unstructured. Orphan loop inside a (non-seq) acc routine
+! defaults to `independent`.
+subroutine test_cache_single_element()
+  !$acc routine gang
+  integer, parameter :: n = 10
+  real, dimension(n) :: a, b
+  integer :: i
+
+  !$acc loop
+  do i = 1, n
+    !$acc cache(b(i))
+    a(i) = b(i)
+    if (a(i) > 100.0) exit
+  end do
+end subroutine
+
+! CEXIT: not yet implemented: unstructured do loop in independent OpenACC loop construct
+
+!--- cache_select_case.f90
+
+! `acc loop` with `cache` directive and SELECT CASE inside the body - the
+! SELECT CASE makes the loop's body have unstructured CFG. Orphan loop
+! inside a (non-seq) acc routine defaults to `independent`.
+subroutine test_cache_nonunit_lb()
+  !$acc routine gang
+  integer :: arr(10:20)
+  integer :: i
+
+  !$acc loop
+  do i = 10, 20
+    !$acc cache(arr(15))
+    select case (mod(i, 3))
+    case (0)
+      arr(i) = i * 2
+    case (1)
+      arr(i) = i * 3
+    case default
+      arr(i) = i
+    end select
+  end do
+end subroutine
+
+! CCASE: not yet implemented: unstructured do loop in independent OpenACC loop construct
diff --git a/flang/test/Lower/OpenACC/acc-cache.f90 b/flang/test/Lower/OpenACC/acc-cache.f90
index eb32f3b704198..62af4d311591d 100644
--- a/flang/test/Lower/OpenACC/acc-cache.f90
+++ b/flang/test/Lower/OpenACC/acc-cache.f90
@@ -265,58 +265,9 @@ subroutine test_cache_2d_loop_vars()
 ! CHECK-NEXT: }
 end subroutine
 
-! CHECK-LABEL: func.func @_QPtest_cache_single_element()
-! Test cache with single element access: b(i)
-! This test includes an EXIT statement to verify cache scope with early loop exit
-subroutine test_cache_single_element()
-  integer, parameter :: n = 10
-  real, dimension(n) :: a, b
-  integer :: i
-
-  !$acc loop
-  do i = 1, n
-    !$acc cache(b(i))
-    a(i) = b(i)
-    if (a(i) > 100.0) exit
-  end do
-
-! Unstructured loop with EXIT: acc.loop becomes unstructured with cf.br/cf.cond_br
-! CHECK: acc.loop private({{.*}}) {
-! The privatized iterator is declared
-! CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_cache_single_elementEi"}
-! Loop control is done with cf.br/cf.cond_br in unstructured form
-! CHECK: cf.br ^[[HEADER:.*]]
-! CHECK: ^[[HEADER]]:
-! CHECK: cf.cond_br %{{.*}}, ^[[BODY:.*]], ^[[EXIT:.*]]
-! CHECK: ^[[BODY]]:
-! CHECK: %[[C1:.*]] = arith.constant 1 : index
-! Load iterator i for bounds computation
-! CHECK: %[[I_LOAD:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
-! CHECK: %[[I_CVT1:.*]] = fir.convert %[[I_LOAD]] : (i32) -> i64
-! CHECK: %[[I_IDX:.*]] = fir.convert %[[I_CVT1]] : (i64) -> index
-! Compute lowerbound = i - 1 (single element: upperbound = lowerbound)
-! CHECK: %[[LB:.*]] = arith.subi %[[I_IDX]], %[[C1]] : index
-! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[LB]] : index) extent(%[[C1]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
-! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b
-! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_single_elementEb"}
-! Loop body uses the cached single element
-! CHECK: hlfir.designate %[[DECL]]#0
-! CHECK: fir.load
-! CHECK: hlfir.assign
-! Unstructured control flow: EXIT generates conditional branch
-! CHECK: %[[CMP:.*]] = arith.cmpf ogt, %{{.*}}, %{{.*}} : f32
-! CHECK: cf.cond_br %[[CMP]], ^[[EXIT_BB:.*]], ^[[CONT_BB:.*]]
-! CHECK: ^[[EXIT_BB]]:
-! Early exit path: branch to acc.yield
-! CHECK: cf.br ^[[YIELD:.*]]
-! CHECK: ^[[CONT_BB]]:
-! Normal path: update iterator and loop back
-! CHECK: cf.br ^[[HEADER]]
-! CHECK: ^[[YIELD]]:
-! Scope termination: acc.yield marks end of cache scope
-! CHECK: acc.yield
-! CHECK-NEXT: } attributes {{{.*}}unstructured}
-end subroutine
+! Tests with independent loops + unstructured CFG (cache + EXIT/SELECT CASE)
+! live as TODO-style tests under
+! flang/test/Lower/OpenACC/Todo/acc-unstructured-loop-construct.f90.
 
 ! CHECK-LABEL: func.func @_QPtest_cache_mixed_bounds()
 ! Test cache with mixed constant and variable bounds: b(1:i)
@@ -365,70 +316,6 @@ subroutine test_cache_mixed_bounds()
 ! CHECK-NEXT: }
 end subroutine
 
-! CHECK-LABEL: func.func @_QPtest_cache_nonunit_lb()
-! Test cache with array that has non-1 lower bound: arr(10:20), cache(arr(15))
-! This test includes SELECT CASE for multi-way unstructured control flow
-subroutine test_cache_nonunit_lb()
-  integer :: arr(10:20)
-  integer :: i
-
-  !$acc loop
-  do i = 10, 20
-    !$acc cache(arr(15))
-    select case (mod(i, 3))
-    case (0)
-      arr(i) = i * 2
-    case (1)
-      arr(i) = i * 3
-    case default
-      arr(i) = i
-    end select
-  end do
-
-! For arr(10:20), startIdx = 10, element 15 has lowerbound = 15 - 10 = 5
-! CHECK: %[[C10:.*]] = arith.constant 10 : index
-! Unstructured loop with SELECT CASE: acc.loop becomes unstructured
-! CHECK: acc.loop private({{.*}}) {
-! CHECK: cf.br ^[[HEADER:.*]]
-! CHECK: ^[[HEADER]]:
-! CHECK: cf.cond_br %{{.*}}, ^[[BODY:.*]], ^[[EXIT:.*]]
-! CHECK: ^[[BODY]]:
-! Compute lowerbound = 15 - startIdx = 15 - 10 = 5
-! CHECK: %[[C1:.*]] = arith.constant 1 : index
-! CHECK: %[[C15:.*]] = arith.constant 15 : index
-! CHECK: %[[LB:.*]] = arith.subi %[[C15]], %{{.*}} : index
-! Single element: upperbound equals lowerbound, startIdx = 10
-! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[LB]] : index) extent(%[[C1]] : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index) {strideInBytes = true}
-! For non-unit lower bound arrays, acc.cache uses the box type from hlfir.declare
-! CHECK: %[[CACHE:.*]] = acc.cache var(%{{.*}} : !fir.box<!fir.array<11xi32>>) bounds(%[[BOUND]]) -> !fir.box<!fir.array<11xi32>> {{{.*}}name = "arr
-! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_nonunit_lbEarr"}
-! Unstructured control flow: SELECT CASE generates fir.select_case
-! CHECK: %[[MOD:.*]] = arith.remsi %{{.*}}, %{{.*}} : i32
-! CHECK: fir.select_case %[[MOD]] : i32 [#fir.point, %{{.*}}, ^[[CASE0:.*]], #fir.point, %{{.*}}, ^[[CASE1:.*]], unit, ^[[DEFAULT:.*]]]
-! Case 0: i * 2
-! CHECK: ^[[CASE0]]:
-! CHECK: hlfir.designate %[[DECL]]#0
-! CHECK: hlfir.assign
-! CHECK: cf.br ^[[MERGE:.*]]
-! Case 1: i * 3
-! CHECK: ^[[CASE1]]:
-! CHECK: hlfir.designate %[[DECL]]#0
-! CHECK: hlfir.assign
-! CHECK: cf.br ^[[MERGE]]
-! Default case: i
-! CHECK: ^[[DEFAULT]]:
-! CHECK: hlfir.designate %[[DECL]]#0
-! CHECK: hlfir.assign
-! CHECK: cf.br ^[[MERGE]]
-! All SELECT CASE branches converge, then loop back or exit
-! CHECK: ^[[MERGE]]:
-! CHECK: cf.br ^[[HEADER]]
-! CHECK: ^[[EXIT]]:
-! Scope termination: acc.yield marks end of cache scope
-! CHECK: acc.yield
-! CHECK-NEXT: } attributes {{{.*}}unstructured}
-end subroutine
-
 ! CHECK-LABEL: func.func @_QPtest_cache_use_after_region()
 ! CHECK: %[[B_VAR:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_cache_use_after_regionEb"}
 subroutine test_cache_use_after_region()
diff --git a/flang/test/Lower/OpenACC/acc-loop-exit.f90 b/flang/test/Lower/OpenACC/acc-loop-exit.f90
deleted file mode 100644
index 6ab215fdbd842..0000000000000
--- a/flang/test/Lower/OpenACC/acc-loop-exit.f90
+++ /dev/null
@@ -1,41 +0,0 @@
-! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
-
-subroutine sub1(x, a)
-  real :: x(200)
-  integer :: a
-
-  !$acc loop
-  do i = 100, 200
-    x(i) = 1.0
-    if (i == a) return
-  end do
-
-  i = 2
-end
-
-! CHECK-LABEL: func.func @_QPsub1
-! CHECK: %[[A:.*]]:2 = hlfir.declare %arg1 dummy_scope %{{[0-9]+}} arg {{[0-9]+}} {uniq_name = "_QFsub1Ea"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %[[I:.*]]:2 = hlfir.declare %{{[0-9]+}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %[[EXIT_COND:.*]] = acc.loop
-! CHECK:   %[[I:.*]]:2 = hlfir.declare %{{[0-9]+}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: ^bb{{.*}}:
-! CHECK: ^bb{{.*}}:
-! CHECK:   %[[LOAD_I:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
-! CHECK:   %[[LOAD_I:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
-! CHECK:   %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
-! CHECK:   %[[CMP:.*]] = arith.cmpi eq, %[[LOAD_I]], %[[LOAD_A]] : i32
-! CHECK:   cf.cond_br %[[CMP]], ^[[EARLY_RET:.*]], ^[[NO_RET:.*]]
-! CHECK: ^[[EARLY_RET]]:
-! CHECK:   acc.yield %true : i1
-! CHECK: ^[[NO_RET]]:
-! CHECK:   cf.br ^bb{{.*}}
-! CHECK: ^bb{{.*}}:
-! CHECK:   acc.yield %false : i1
-! CHECK: }(i1)
-! CHECK: cf.cond_br %[[EXIT_COND]], ^[[EXIT_BLOCK:.*]], ^[[CONTINUE_BLOCK:.*]]
-! CHECK: ^[[CONTINUE_BLOCK]]:
-! CHECK:   hlfir.assign
-! CHECK:   cf.br ^[[EXIT_BLOCK]]
-! CHECK: ^[[EXIT_BLOCK]]:
-! CHECK:   return
-! CHECK: }
diff --git a/flang/test/Lower/OpenACC/acc-unstructured.f90 b/flang/test/Lower/OpenACC/acc-unstructured.f90
index ce58ae90bdc35..7fc7032f4442f 100644
--- a/flang/test/Lower/OpenACC/acc-unstructured.f90
+++ b/flang/test/Lower/OpenACC/acc-unstructured.f90
@@ -39,29 +39,11 @@ subroutine test_unstructured1(a, b, c)
 ! CHECK: fir.call @_FortranAStopStatementText
 
 
-subroutine test_unstructured2(a, b, c)
-  integer :: i, j, k
-  real :: a(:,:,:), b(:,:,:), c(:,:,:)
-
-  !$acc parallel loop
-  do i = 1, 10
-    do j = 1, 10
-      do k = 1, 10
-        if (a(1,2,3) > 10) stop 'just to be unstructured'
-      end do
-    end do
-  end do
-
-! CHECK-LABEL: func.func @_QPtest_unstructured2
-! CHECK: acc.parallel
-! CHECK: acc.loop combined(parallel) private(%{{.*}} : !fir.ref<i32>) {
-! CHECK: fir.call @_FortranAStopStatementText
-! CHECK: acc.yield
-! CHECK: acc.yield
-! CHECK: } attributes {independent = [#acc.device_type<none>], unstructured}
-! CHECK: acc.yield
-
-end subroutine
+! Tests with independent loops + unstructured CFG (which currently hit the
+! NYI emitted in genACC for `acc loop` / combined constructs) live as
+! TODO-style tests under flang/test/Lower/OpenACC/Todo/:
+!   - Todo/acc-unstructured-combined-construct.f90  (`acc parallel loop` ...)
+!   - Todo/acc-unstructured-loop-construct.f90      (standalone `acc loop` ...)
 
 subroutine test_unstructured3(a, b, c)
   integer :: i, j, k
@@ -138,61 +120,6 @@ subroutine test_unstructured5(a, n)
 ! CHECK: arith.cmpi eq
 ! CHECK: cf.cond_br
 
-! Test that GOTO exiting acc.loop (one level) generates acc.yield
-! instead of an invalid cross-region branch.
-subroutine test_unstructured6(N, A, B)
-  implicit real*8 (a-h, o-z)
-  !$acc routine gang
-  dimension A(*), B(*)
-  !$acc loop gang vector
-  do 100 i = 1, N
-  !$acc loop seq
-    do 10 j = 1, 1000
-      if (A(i) .gt. B(i)) goto 20
-10  continue
-20  B(i) = A(i)
-100 continue
-end subroutine
-
-! CHECK-LABEL: func.func @_QPtest_unstructured6
-! CHECK: acc.loop gang vector
-! CHECK: acc.loop
-! CHECK: arith.cmpf ogt
-! CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref<i32>
-! CHECK: acc.yield
-! CHECK: } attributes {seq = [#acc.device_type<none>], unstructured}
-
-! Test GOTO exiting acc.loop with intermediate code between loop end and
-! target. A jump table (exit selector + dispatch) skips the intermediate code.
-subroutine test_unstructured7(A, B, C, N)
-  implicit real*8 (a-h, o-z)
-  !$acc routine gang
-  dimension A(*), B(*), C(*)
-  !$acc loop gang vector
-  do 100 i = 1, N
-  !$acc loop seq
-    do 10 j = 1, 1000
-      if (A(i) .gt. B(i)) goto 20
-10  continue
-    C(i) = 999.0
-20  B(i) = A(i)
-100 continue
-end subroutine
-
-! CHECK-LABEL: func.func @_QPtest_unstructured7
-! CHECK: acc.loop gang vector
-! Inner loop stores exit selector and yields:
-! CHECK: acc.loop
-! CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref<i32>
-! CHECK: acc.yield
-! CHECK: } attributes {seq = [#acc.device_type<none>], unstructured}
-! Jump table after inner loop:
-! CHECK: fir.load %{{.*}} : !fir.ref<i32>
-! CHECK: arith.cmpi eq
-! CHECK: cf.cond_br
-! Intermediate code on fall-through path:
-! CHECK: arith.constant 9.990000e+02
-
 ! Test GOTO exiting acc.data with intermediate code. Jump table dispatches
 ! after the acc.data op.
 subroutine test_unstructured8(a, n)
@@ -223,90 +150,100 @@ subroutine test_unstructured8(a, n)
 ! CHECK: arith.cmpi eq
 ! CHECK: cf.cond_br
 
-! Test that `acc parallel loop collapse(N)` whose body has an early-exit
-! (here, `if (cond) then ... cycle ... end if`) lowers cleanly. The
-! corresponding acc.loop must privatize all N induction variables, carry
-! both `collapse = [N]` and `unstructured` attributes, and emit the
-! iteration mechanics for all N levels as explicit cf inside the body.
-! Reproducer derived from lorado issue #2856.
-subroutine test_unstructured_collapse_cycle(a)
-  integer :: i, j, jdiag
-  real(8) :: a(:,:)
-  jdiag = 4
-  !$acc parallel loop collapse(2) copy(a)
-  do j = 1, 8
-    do i = 1, 8
-      if (i == jdiag) then
-        a(i, j) = 0.0d0
-        cycle
-      end if
-      a(i, j) = real(i + j, 8)
+! The NYI for unstructured loops associated with an OpenACC loop/combined
+! directive only applies when the loop will be lowered as `independent`. The
+! tests below exercise the relaxed cases where the user has not promised
+! parallelism (seq/auto), so the lowering is expected to emit an `acc.loop`
+! with explicit unstructured CFG inside.
+
+! Combined `acc serial loop` (loop is `seq` by default) with STOP in body.
+subroutine test_unstructured_serial_loop_stop(a)
+  integer :: i, j
+  real :: a(:,:,:)
+  !$acc serial loop
+  do i = 1, 10
+    do j = 1, 10
+      if (a(1,2,3) > 10.0) stop 'unstructured'
+    end do
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_unstructured_serial_loop_stop
+! CHECK: acc.serial combined(loop)
+! CHECK: acc.loop combined(serial)
+! CHECK: fir.call @_FortranAStopStatementText
+! CHECK: } attributes {{{.*}}seq = [#acc.device_type<none>], unstructured}
+
+! Standalone `acc loop seq` with STOP in body (explicit `seq` clause).
+subroutine test_unstructured_loop_seq_stop(a)
+  integer :: i, j
+  real :: a(:,:,:)
+  !$acc loop seq
+  do i = 1, 10
+    do j = 1, 10
+      if (a(1,2,3) > 10.0) stop 'unstructured'
     end do
   end do
-  !$acc end parallel loop
 end subroutine
 
-! CHECK-LABEL: func.func @_QPtest_unstructured_collapse_cycle
-! CHECK: acc.parallel combined(loop)
-! Both induction variables (j and i) are privatized:
-! CHECK: %[[PRIVJ:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "j"}
-! CHECK: %[[PRIVI:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "i"}
-! No control(...) on acc.loop — bounds are not on the op:
-! CHECK: acc.loop combined(parallel) private(%[[PRIVJ]], %[[PRIVI]] : !fir.ref<i32>, !fir.ref<i32>) {
-! Outer loop trip-count test (j) emitted as cf:
-! CHECK: arith.cmpi sgt
-! CHECK: cf.cond_br
-! Inner loop trip-count test (i) emitted as cf:
-! CHECK: arith.cmpi sgt
-! CHECK: cf.cond_br
-! The if/cycle is a structured cf branch in the body:
-! CHECK: arith.cmpi eq
-! CHECK: cf.cond_br
-! CHECK: acc.yield
-! CHECK: } attributes {collapse = [2], collapseDeviceType = [#acc.device_type<none>], independent = [#acc.device_type<none>], unstructured}
+! CHECK-LABEL: func.func @_QPtest_unstructured_loop_seq_stop
+! CHECK: acc.loop private({{.*}})
+! CHECK: fir.call @_FortranAStopStatementText
+! CHECK: } attributes {{{.*}}seq = [#acc.device_type<none>], unstructured}
 
-! Test that `acc parallel loop collapse(N)` lowers cleanly when the early-exit
-! is a STOP (the form already covered for collapse=1 by test_unstructured2).
-subroutine test_unstructured_collapse_stop(a)
-  integer :: i, j, k
+! Standalone `acc loop auto` with STOP in body (explicit `auto` clause).
+subroutine test_unstructured_loop_auto_stop(a)
+  integer :: i, j
   real :: a(:,:,:)
-  !$acc parallel loop collapse(3)
+  !$acc loop auto
   do i = 1, 10
     do j = 1, 10
-      do k = 1, 10
-        if (a(1,2,3) > 10) stop 'just to be unstructured'
-      end do
+      if (a(1,2,3) > 10.0) stop 'unstructured'
+    end do
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_unstructured_loop_auto_stop
+! CHECK: acc.loop private({{.*}})
+! CHECK: fir.call @_FortranAStopStatementText
+! CHECK: } attributes {auto_ = [#acc.device_type<none>], {{.*}}unstructured}
+
+! Standalone `acc loop` inside `acc serial` with STOP in body (loop is `seq`
+! by default because parent compute construct is serial).
+subroutine test_unstructured_loop_in_serial_stop(a)
+  integer :: i, j
+  real :: a(:,:,:)
+  !$acc serial
+  !$acc loop
+  do i = 1, 10
+    do j = 1, 10
+      if (a(1,2,3) > 10.0) stop 'unstructured'
     end do
   end do
+  !$acc end serial
 end subroutine
 
-! CHECK-LABEL: func.func @_QPtest_unstructured_collapse_stop
-! All three IVs privatized:
-! CHECK: acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "i"}
-! CHECK: acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "j"}
-! CHECK: acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "k"}
-! CHECK: acc.loop combined(parallel) private(%{{.*}}, %{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK-LABEL: func.func @_QPtest_unstructured_loop_in_serial_stop
+! CHECK: acc.serial
+! CHECK: acc.loop private({{.*}})
 ! CHECK: fir.call @_FortranAStopStatementText
-! CHECK: } attributes {collapse = [3], collapseDeviceType = [#acc.device_type<none>], independent = [#acc.device_type<none>], unstructured}
-
-! Test orphaned `acc loop collapse(N)`
-subroutine test_unstructured_collapse_loop_only(a)
-  integer :: i, j, jdiag
-  real(8) :: a(:,:)
-  jdiag = 4
-  !$acc loop collapse(2)
-  do j = 1, 8
-    do i = 1, 8
-      if (i == jdiag) then
-        a(i, j) = 0.0d0
-        cycle
-      end if
-      a(i, j) = real(i + j, 8)
+! CHECK: } attributes {{{.*}}seq = [#acc.device_type<none>], unstructured}
+
+! Orphan `acc loop` inside a `seq` acc routine: loop is `seq` by default.
+subroutine test_unstructured_orphan_loop_in_seq_routine(a)
+  integer :: i, j
+  real :: a(:,:,:)
+  !$acc routine seq
+  !$acc loop
+  do i = 1, 10
+    do j = 1, 10
+      if (a(1,2,3) > 10.0) stop 'unstructured'
     end do
   end do
 end subroutine
 
-! CHECK-LABEL: func.func @_QPtest_unstructured_collapse_loop_only
-! Standalone acc.loop (no `combined(...)`):
-! CHECK: acc.loop private(%{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>) {
-! CHECK: } attributes {collapse = [2], collapseDeviceType = [#acc.device_type<none>], independent = [#acc.device_type<none>], unstructured}
+! CHECK-LABEL: func.func @_QPtest_unstructured_orphan_loop_in_seq_routine
+! CHECK: acc.loop private({{.*}})
+! CHECK: fir.call @_FortranAStopStatementText
+! CHECK: } attributes {{{.*}}seq = [#acc.device_type<none>], unstructured}
+

>From 13748fff4bb704f7d1005fb2d9f277258403ae8c Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at gmail.com>
Date: Thu, 11 Jun 2026 22:32:29 -0700
Subject: [PATCH 2/2] Re-introduce removed test

---
 flang/test/Lower/OpenACC/acc-cache.f90        | 119 +++++++++++-
 flang/test/Lower/OpenACC/acc-loop-exit.f90    |  41 ++++
 flang/test/Lower/OpenACC/acc-unstructured.f90 | 175 ++++++++++++++++--
 3 files changed, 313 insertions(+), 22 deletions(-)
 create mode 100644 flang/test/Lower/OpenACC/acc-loop-exit.f90

diff --git a/flang/test/Lower/OpenACC/acc-cache.f90 b/flang/test/Lower/OpenACC/acc-cache.f90
index 62af4d311591d..eb32f3b704198 100644
--- a/flang/test/Lower/OpenACC/acc-cache.f90
+++ b/flang/test/Lower/OpenACC/acc-cache.f90
@@ -265,9 +265,58 @@ subroutine test_cache_2d_loop_vars()
 ! CHECK-NEXT: }
 end subroutine
 
-! Tests with independent loops + unstructured CFG (cache + EXIT/SELECT CASE)
-! live as TODO-style tests under
-! flang/test/Lower/OpenACC/Todo/acc-unstructured-loop-construct.f90.
+! CHECK-LABEL: func.func @_QPtest_cache_single_element()
+! Test cache with single element access: b(i)
+! This test includes an EXIT statement to verify cache scope with early loop exit
+subroutine test_cache_single_element()
+  integer, parameter :: n = 10
+  real, dimension(n) :: a, b
+  integer :: i
+
+  !$acc loop
+  do i = 1, n
+    !$acc cache(b(i))
+    a(i) = b(i)
+    if (a(i) > 100.0) exit
+  end do
+
+! Unstructured loop with EXIT: acc.loop becomes unstructured with cf.br/cf.cond_br
+! CHECK: acc.loop private({{.*}}) {
+! The privatized iterator is declared
+! CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_cache_single_elementEi"}
+! Loop control is done with cf.br/cf.cond_br in unstructured form
+! CHECK: cf.br ^[[HEADER:.*]]
+! CHECK: ^[[HEADER]]:
+! CHECK: cf.cond_br %{{.*}}, ^[[BODY:.*]], ^[[EXIT:.*]]
+! CHECK: ^[[BODY]]:
+! CHECK: %[[C1:.*]] = arith.constant 1 : index
+! Load iterator i for bounds computation
+! CHECK: %[[I_LOAD:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
+! CHECK: %[[I_CVT1:.*]] = fir.convert %[[I_LOAD]] : (i32) -> i64
+! CHECK: %[[I_IDX:.*]] = fir.convert %[[I_CVT1]] : (i64) -> index
+! Compute lowerbound = i - 1 (single element: upperbound = lowerbound)
+! CHECK: %[[LB:.*]] = arith.subi %[[I_IDX]], %[[C1]] : index
+! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[LB]] : index) extent(%[[C1]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
+! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b
+! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_single_elementEb"}
+! Loop body uses the cached single element
+! CHECK: hlfir.designate %[[DECL]]#0
+! CHECK: fir.load
+! CHECK: hlfir.assign
+! Unstructured control flow: EXIT generates conditional branch
+! CHECK: %[[CMP:.*]] = arith.cmpf ogt, %{{.*}}, %{{.*}} : f32
+! CHECK: cf.cond_br %[[CMP]], ^[[EXIT_BB:.*]], ^[[CONT_BB:.*]]
+! CHECK: ^[[EXIT_BB]]:
+! Early exit path: branch to acc.yield
+! CHECK: cf.br ^[[YIELD:.*]]
+! CHECK: ^[[CONT_BB]]:
+! Normal path: update iterator and loop back
+! CHECK: cf.br ^[[HEADER]]
+! CHECK: ^[[YIELD]]:
+! Scope termination: acc.yield marks end of cache scope
+! CHECK: acc.yield
+! CHECK-NEXT: } attributes {{{.*}}unstructured}
+end subroutine
 
 ! CHECK-LABEL: func.func @_QPtest_cache_mixed_bounds()
 ! Test cache with mixed constant and variable bounds: b(1:i)
@@ -316,6 +365,70 @@ subroutine test_cache_mixed_bounds()
 ! CHECK-NEXT: }
 end subroutine
 
+! CHECK-LABEL: func.func @_QPtest_cache_nonunit_lb()
+! Test cache with array that has non-1 lower bound: arr(10:20), cache(arr(15))
+! This test includes SELECT CASE for multi-way unstructured control flow
+subroutine test_cache_nonunit_lb()
+  integer :: arr(10:20)
+  integer :: i
+
+  !$acc loop
+  do i = 10, 20
+    !$acc cache(arr(15))
+    select case (mod(i, 3))
+    case (0)
+      arr(i) = i * 2
+    case (1)
+      arr(i) = i * 3
+    case default
+      arr(i) = i
+    end select
+  end do
+
+! For arr(10:20), startIdx = 10, element 15 has lowerbound = 15 - 10 = 5
+! CHECK: %[[C10:.*]] = arith.constant 10 : index
+! Unstructured loop with SELECT CASE: acc.loop becomes unstructured
+! CHECK: acc.loop private({{.*}}) {
+! CHECK: cf.br ^[[HEADER:.*]]
+! CHECK: ^[[HEADER]]:
+! CHECK: cf.cond_br %{{.*}}, ^[[BODY:.*]], ^[[EXIT:.*]]
+! CHECK: ^[[BODY]]:
+! Compute lowerbound = 15 - startIdx = 15 - 10 = 5
+! CHECK: %[[C1:.*]] = arith.constant 1 : index
+! CHECK: %[[C15:.*]] = arith.constant 15 : index
+! CHECK: %[[LB:.*]] = arith.subi %[[C15]], %{{.*}} : index
+! Single element: upperbound equals lowerbound, startIdx = 10
+! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[LB]] : index) extent(%[[C1]] : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index) {strideInBytes = true}
+! For non-unit lower bound arrays, acc.cache uses the box type from hlfir.declare
+! CHECK: %[[CACHE:.*]] = acc.cache var(%{{.*}} : !fir.box<!fir.array<11xi32>>) bounds(%[[BOUND]]) -> !fir.box<!fir.array<11xi32>> {{{.*}}name = "arr
+! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_nonunit_lbEarr"}
+! Unstructured control flow: SELECT CASE generates fir.select_case
+! CHECK: %[[MOD:.*]] = arith.remsi %{{.*}}, %{{.*}} : i32
+! CHECK: fir.select_case %[[MOD]] : i32 [#fir.point, %{{.*}}, ^[[CASE0:.*]], #fir.point, %{{.*}}, ^[[CASE1:.*]], unit, ^[[DEFAULT:.*]]]
+! Case 0: i * 2
+! CHECK: ^[[CASE0]]:
+! CHECK: hlfir.designate %[[DECL]]#0
+! CHECK: hlfir.assign
+! CHECK: cf.br ^[[MERGE:.*]]
+! Case 1: i * 3
+! CHECK: ^[[CASE1]]:
+! CHECK: hlfir.designate %[[DECL]]#0
+! CHECK: hlfir.assign
+! CHECK: cf.br ^[[MERGE]]
+! Default case: i
+! CHECK: ^[[DEFAULT]]:
+! CHECK: hlfir.designate %[[DECL]]#0
+! CHECK: hlfir.assign
+! CHECK: cf.br ^[[MERGE]]
+! All SELECT CASE branches converge, then loop back or exit
+! CHECK: ^[[MERGE]]:
+! CHECK: cf.br ^[[HEADER]]
+! CHECK: ^[[EXIT]]:
+! Scope termination: acc.yield marks end of cache scope
+! CHECK: acc.yield
+! CHECK-NEXT: } attributes {{{.*}}unstructured}
+end subroutine
+
 ! CHECK-LABEL: func.func @_QPtest_cache_use_after_region()
 ! CHECK: %[[B_VAR:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_cache_use_after_regionEb"}
 subroutine test_cache_use_after_region()
diff --git a/flang/test/Lower/OpenACC/acc-loop-exit.f90 b/flang/test/Lower/OpenACC/acc-loop-exit.f90
new file mode 100644
index 0000000000000..6ab215fdbd842
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-loop-exit.f90
@@ -0,0 +1,41 @@
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+
+subroutine sub1(x, a)
+  real :: x(200)
+  integer :: a
+
+  !$acc loop
+  do i = 100, 200
+    x(i) = 1.0
+    if (i == a) return
+  end do
+
+  i = 2
+end
+
+! CHECK-LABEL: func.func @_QPsub1
+! CHECK: %[[A:.*]]:2 = hlfir.declare %arg1 dummy_scope %{{[0-9]+}} arg {{[0-9]+}} {uniq_name = "_QFsub1Ea"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[I:.*]]:2 = hlfir.declare %{{[0-9]+}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[EXIT_COND:.*]] = acc.loop
+! CHECK:   %[[I:.*]]:2 = hlfir.declare %{{[0-9]+}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: ^bb{{.*}}:
+! CHECK: ^bb{{.*}}:
+! CHECK:   %[[LOAD_I:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
+! CHECK:   %[[LOAD_I:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
+! CHECK:   %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
+! CHECK:   %[[CMP:.*]] = arith.cmpi eq, %[[LOAD_I]], %[[LOAD_A]] : i32
+! CHECK:   cf.cond_br %[[CMP]], ^[[EARLY_RET:.*]], ^[[NO_RET:.*]]
+! CHECK: ^[[EARLY_RET]]:
+! CHECK:   acc.yield %true : i1
+! CHECK: ^[[NO_RET]]:
+! CHECK:   cf.br ^bb{{.*}}
+! CHECK: ^bb{{.*}}:
+! CHECK:   acc.yield %false : i1
+! CHECK: }(i1)
+! CHECK: cf.cond_br %[[EXIT_COND]], ^[[EXIT_BLOCK:.*]], ^[[CONTINUE_BLOCK:.*]]
+! CHECK: ^[[CONTINUE_BLOCK]]:
+! CHECK:   hlfir.assign
+! CHECK:   cf.br ^[[EXIT_BLOCK]]
+! CHECK: ^[[EXIT_BLOCK]]:
+! CHECK:   return
+! CHECK: }
diff --git a/flang/test/Lower/OpenACC/acc-unstructured.f90 b/flang/test/Lower/OpenACC/acc-unstructured.f90
index 7fc7032f4442f..115626da9d160 100644
--- a/flang/test/Lower/OpenACC/acc-unstructured.f90
+++ b/flang/test/Lower/OpenACC/acc-unstructured.f90
@@ -39,11 +39,29 @@ subroutine test_unstructured1(a, b, c)
 ! CHECK: fir.call @_FortranAStopStatementText
 
 
-! Tests with independent loops + unstructured CFG (which currently hit the
-! NYI emitted in genACC for `acc loop` / combined constructs) live as
-! TODO-style tests under flang/test/Lower/OpenACC/Todo/:
-!   - Todo/acc-unstructured-combined-construct.f90  (`acc parallel loop` ...)
-!   - Todo/acc-unstructured-loop-construct.f90      (standalone `acc loop` ...)
+subroutine test_unstructured2(a, b, c)
+  integer :: i, j, k
+  real :: a(:,:,:), b(:,:,:), c(:,:,:)
+
+  !$acc serial loop
+  do i = 1, 10
+    do j = 1, 10
+      do k = 1, 10
+        if (a(1,2,3) > 10) stop 'just to be unstructured'
+      end do
+    end do
+  end do
+
+! CHECK-LABEL: func.func @_QPtest_unstructured2
+! CHECK: acc.serial
+! CHECK: acc.loop combined(serial) private(%{{.*}} : !fir.ref<i32>) {
+! CHECK: fir.call @_FortranAStopStatementText
+! CHECK: acc.yield
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+
+end subroutine
 
 subroutine test_unstructured3(a, b, c)
   integer :: i, j, k
@@ -120,6 +138,61 @@ subroutine test_unstructured5(a, n)
 ! CHECK: arith.cmpi eq
 ! CHECK: cf.cond_br
 
+! Test that GOTO exiting acc.loop (one level) generates acc.yield
+! instead of an invalid cross-region branch.
+subroutine test_unstructured6(N, A, B)
+  implicit real*8 (a-h, o-z)
+  !$acc routine seq
+  dimension A(*), B(*)
+  !$acc loop gang vector
+  do 100 i = 1, N
+  !$acc loop seq
+    do 10 j = 1, 1000
+      if (A(i) .gt. B(i)) goto 20
+10  continue
+20  B(i) = A(i)
+100 continue
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_unstructured6
+! CHECK: acc.loop gang vector
+! CHECK: acc.loop
+! CHECK: arith.cmpf ogt
+! CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref<i32>
+! CHECK: acc.yield
+! CHECK: } attributes {seq = [#acc.device_type<none>], unstructured}
+
+! Test GOTO exiting acc.loop with intermediate code between loop end and
+! target. A jump table (exit selector + dispatch) skips the intermediate code.
+subroutine test_unstructured7(A, B, C, N)
+  implicit real*8 (a-h, o-z)
+  !$acc routine seq
+  dimension A(*), B(*), C(*)
+  !$acc loop gang vector
+  do 100 i = 1, N
+  !$acc loop seq
+    do 10 j = 1, 1000
+      if (A(i) .gt. B(i)) goto 20
+10  continue
+    C(i) = 999.0
+20  B(i) = A(i)
+100 continue
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_unstructured7
+! CHECK: acc.loop gang vector
+! Inner loop stores exit selector and yields:
+! CHECK: acc.loop
+! CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref<i32>
+! CHECK: acc.yield
+! CHECK: } attributes {seq = [#acc.device_type<none>], unstructured}
+! Jump table after inner loop:
+! CHECK: fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: arith.cmpi eq
+! CHECK: cf.cond_br
+! Intermediate code on fall-through path:
+! CHECK: arith.constant 9.990000e+02
+
 ! Test GOTO exiting acc.data with intermediate code. Jump table dispatches
 ! after the acc.data op.
 subroutine test_unstructured8(a, n)
@@ -150,29 +223,93 @@ subroutine test_unstructured8(a, n)
 ! CHECK: arith.cmpi eq
 ! CHECK: cf.cond_br
 
-! The NYI for unstructured loops associated with an OpenACC loop/combined
-! directive only applies when the loop will be lowered as `independent`. The
-! tests below exercise the relaxed cases where the user has not promised
-! parallelism (seq/auto), so the lowering is expected to emit an `acc.loop`
-! with explicit unstructured CFG inside.
+! Test that `acc serial loop collapse(N)` whose body has an early-exit
+! (here, `if (cond) then ... cycle ... end if`) lowers cleanly. The
+! corresponding acc.loop must privatize all N induction variables, carry
+! both `collapse = [N]` and `unstructured` attributes, and emit the
+! iteration mechanics for all N levels as explicit cf inside the body.
+! Reproducer derived from lorado issue #2856.
+subroutine test_unstructured_collapse_cycle(a)
+  integer :: i, j, jdiag
+  real(8) :: a(:,:)
+  jdiag = 4
+  !$acc serial loop collapse(2) copy(a)
+  do j = 1, 8
+    do i = 1, 8
+      if (i == jdiag) then
+        a(i, j) = 0.0d0
+        cycle
+      end if
+      a(i, j) = real(i + j, 8)
+    end do
+  end do
+  !$acc end serial loop
+end subroutine
 
-! Combined `acc serial loop` (loop is `seq` by default) with STOP in body.
-subroutine test_unstructured_serial_loop_stop(a)
-  integer :: i, j
+! CHECK-LABEL: func.func @_QPtest_unstructured_collapse_cycle
+! CHECK: acc.serial combined(loop)
+! Both induction variables (j and i) are privatized:
+! CHECK: %[[PRIVJ:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "j"}
+! CHECK: %[[PRIVI:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "i"}
+! No control(...) on acc.loop — bounds are not on the op:
+! CHECK: acc.loop combined(serial) private(%[[PRIVJ]], %[[PRIVI]] : !fir.ref<i32>, !fir.ref<i32>) {
+! Outer loop trip-count test (j) emitted as cf:
+! CHECK: arith.cmpi sgt
+! CHECK: cf.cond_br
+! Inner loop trip-count test (i) emitted as cf:
+! CHECK: arith.cmpi sgt
+! CHECK: cf.cond_br
+! The if/cycle is a structured cf branch in the body:
+! CHECK: arith.cmpi eq
+! CHECK: cf.cond_br
+! CHECK: acc.yield
+! CHECK: }
+
+! Test that `acc serial loop collapse(N)` lowers cleanly when the early-exit
+! is a STOP (the form already covered for collapse=1 by test_unstructured2).
+subroutine test_unstructured_collapse_stop(a)
+  integer :: i, j, k
   real :: a(:,:,:)
-  !$acc serial loop
+  !$acc serial loop collapse(3)
   do i = 1, 10
     do j = 1, 10
-      if (a(1,2,3) > 10.0) stop 'unstructured'
+      do k = 1, 10
+        if (a(1,2,3) > 10) stop 'just to be unstructured'
+      end do
     end do
   end do
 end subroutine
 
-! CHECK-LABEL: func.func @_QPtest_unstructured_serial_loop_stop
-! CHECK: acc.serial combined(loop)
-! CHECK: acc.loop combined(serial)
+! CHECK-LABEL: func.func @_QPtest_unstructured_collapse_stop
+! All three IVs privatized:
+! CHECK: acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "j"}
+! CHECK: acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "k"}
+! CHECK: acc.loop combined(serial) private(%{{.*}}, %{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
 ! CHECK: fir.call @_FortranAStopStatementText
-! CHECK: } attributes {{{.*}}seq = [#acc.device_type<none>], unstructured}
+! CHECK: }
+
+! Test orphaned `acc loop collapse(N)`
+subroutine test_unstructured_collapse_loop_only(a)
+  integer :: i, j, jdiag
+  real(8) :: a(:,:)
+  jdiag = 4
+  !$acc loop collapse(2)
+  do j = 1, 8
+    do i = 1, 8
+      if (i == jdiag) then
+        a(i, j) = 0.0d0
+        cycle
+      end if
+      a(i, j) = real(i + j, 8)
+    end do
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_unstructured_collapse_loop_only
+! Standalone acc.loop (no `combined(...)`):
+! CHECK: acc.loop private(%{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK: } attributes {collapse = [2], collapseDeviceType = [#acc.device_type<none>], independent = [#acc.device_type<none>], unstructured}
 
 ! Standalone `acc loop seq` with STOP in body (explicit `seq` clause).
 subroutine test_unstructured_loop_seq_stop(a)