[flang-commits] [flang] [OpenACC][flang] Emit NYI when unstructured loops are associated with OpenACC directives (PR #202948)
Kareem Ergawy via flang-commits
flang-commits at lists.llvm.org
Wed Jun 10 05:17:58 PDT 2026
https://github.com/ergawy created https://github.com/llvm/llvm-project/pull/202948
When an unstructured loop is associated with a loop or a combined directive, we emit an unstructured CFG for the loop's logic nested within the OpenACC op. This effectively serializes the nested loop on the device which is not desirable. For now, emit NYI's while working on a longer-term solution.
>From 3ffc2cc29e676c7f9f80000a64fc11f643cf6086 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at gmail.com>
Date: Wed, 10 Jun 2026 03:54:26 -0700
Subject: [PATCH] [OpenACC][flang] Emit NYI when unstructured loops are
associated with OpenACC directives
When an unstructured loop is associated with a loop or a combined
directive, we emit an unstructured CFG for the loop's logic nested
within the OpenACC op. This effectively serializes the nested loop on
the device which is not desirable. For now, emit NYI's while working on
a longer-term solution.
---
flang/lib/Lower/OpenACC.cpp | 7 +
.../Todo/acc-goto-multi-level-exit.f90 | 2 +-
.../acc-unstructured-combined-construct.f90 | 48 +++
flang/test/Lower/OpenACC/acc-cache.f90 | 237 ++++++------
flang/test/Lower/OpenACC/acc-loop-exit.f90 | 60 ++--
flang/test/Lower/OpenACC/acc-unstructured.f90 | 337 +++++++++---------
6 files changed, 384 insertions(+), 307 deletions(-)
create mode 100644 flang/test/Lower/OpenACC/Todo/acc-unstructured-combined-construct.f90
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 1c51cf7fa6ca5..d4b369639921f 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -2379,6 +2379,10 @@ genACC(Fortran::lower::AbstractConverter &converter,
std::get<Fortran::parser::AccClauseList>(beginLoopDirective.t);
const auto &outerDoConstruct =
std::get<std::optional<Fortran::parser::DoConstruct>>(loopConstruct.t);
+
+ if (outerDoConstruct.has_value() && eval.lowerAsUnstructured())
+ TODO(currentLocation, "unstructured do loop in acc loop construct");
+
auto loopOp = createLoopOp(converter, currentLocation, semanticsContext,
stmtCtx, *outerDoConstruct, eval, accClauseList,
/*combinedConstructs=*/{});
@@ -3148,6 +3152,9 @@ genACC(Fortran::lower::AbstractConverter &converter,
converter.genLocation(beginCombinedDirective.source);
Fortran::lower::StatementContext stmtCtx;
+ if (outerDoConstruct.has_value() && eval.lowerAsUnstructured())
+ TODO(currentLocation, "unstructured do loop in combined acc construct");
+
if (combinedDirective.v == llvm::acc::ACCD_kernels_loop) {
createComputeOp<mlir::acc::KernelsOp>(
converter, currentLocation, eval, semanticsContext, stmtCtx,
diff --git a/flang/test/Lower/OpenACC/Todo/acc-goto-multi-level-exit.f90 b/flang/test/Lower/OpenACC/Todo/acc-goto-multi-level-exit.f90
index 7f11b4a7b70e1..c4314571d96ba 100644
--- a/flang/test/Lower/OpenACC/Todo/acc-goto-multi-level-exit.f90
+++ b/flang/test/Lower/OpenACC/Todo/acc-goto-multi-level-exit.f90
@@ -41,4 +41,4 @@ subroutine nested_loop_exit(A, B, N)
200 continue
end subroutine
-! LOOP: not yet implemented: GOTO exiting OpenACC region
+! LOOP: not yet implemented: unstructured do loop in acc loop construct
diff --git a/flang/test/Lower/OpenACC/Todo/acc-unstructured-combined-construct.f90 b/flang/test/Lower/OpenACC/Todo/acc-unstructured-combined-construct.f90
new file mode 100644
index 0000000000000..1e0def8757c0b
--- /dev/null
+++ b/flang/test/Lower/OpenACC/Todo/acc-unstructured-combined-construct.f90
@@ -0,0 +1,48 @@
+! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %s -o - 2>&1 | FileCheck %s
+
+subroutine test_unstructured2(a, b, c)
+ integer :: i, j, k
+ real :: a(:,:,:), b(:,:,:), c(:,:,:)
+
+ !$acc parallel loop
+ do i = 1, 10
+ do j = 1, 10
+ do k = 1, 10
+ if (a(1,2,3) > 10) stop 'just to be unstructured'
+ end do
+ end do
+ end do
+
+end subroutine
+
+subroutine test_unstructured_collapse_cycle(a)
+ integer :: i, j, jdiag
+ real(8) :: a(:,:)
+ jdiag = 4
+ !$acc parallel loop collapse(2) copy(a)
+ do j = 1, 8
+ do i = 1, 8
+ if (i == jdiag) then
+ a(i, j) = 0.0d0
+ cycle
+ end if
+ a(i, j) = real(i + j, 8)
+ end do
+ end do
+ !$acc end parallel loop
+end subroutine
+
+subroutine test_unstructured_collapse_stop(a)
+ integer :: i, j, k
+ real :: a(:,:,:)
+ !$acc parallel loop collapse(3)
+ do i = 1, 10
+ do j = 1, 10
+ do k = 1, 10
+ if (a(1,2,3) > 10) stop 'just to be unstructured'
+ end do
+ end do
+ end do
+end subroutine
+
+! CHECK: not yet implemented: unstructured do loop in combined acc construct
diff --git a/flang/test/Lower/OpenACC/acc-cache.f90 b/flang/test/Lower/OpenACC/acc-cache.f90
index eb32f3b704198..923318fa0010b 100644
--- a/flang/test/Lower/OpenACC/acc-cache.f90
+++ b/flang/test/Lower/OpenACC/acc-cache.f90
@@ -265,58 +265,62 @@ subroutine test_cache_2d_loop_vars()
! CHECK-NEXT: }
end subroutine
-! CHECK-LABEL: func.func @_QPtest_cache_single_element()
-! Test cache with single element access: b(i)
-! This test includes an EXIT statement to verify cache scope with early loop exit
-subroutine test_cache_single_element()
- integer, parameter :: n = 10
- real, dimension(n) :: a, b
- integer :: i
-
- !$acc loop
- do i = 1, n
- !$acc cache(b(i))
- a(i) = b(i)
- if (a(i) > 100.0) exit
- end do
-
-! Unstructured loop with EXIT: acc.loop becomes unstructured with cf.br/cf.cond_br
-! CHECK: acc.loop private({{.*}}) {
-! The privatized iterator is declared
-! CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_cache_single_elementEi"}
-! Loop control is done with cf.br/cf.cond_br in unstructured form
-! CHECK: cf.br ^[[HEADER:.*]]
-! CHECK: ^[[HEADER]]:
-! CHECK: cf.cond_br %{{.*}}, ^[[BODY:.*]], ^[[EXIT:.*]]
-! CHECK: ^[[BODY]]:
-! CHECK: %[[C1:.*]] = arith.constant 1 : index
-! Load iterator i for bounds computation
-! CHECK: %[[I_LOAD:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
-! CHECK: %[[I_CVT1:.*]] = fir.convert %[[I_LOAD]] : (i32) -> i64
-! CHECK: %[[I_IDX:.*]] = fir.convert %[[I_CVT1]] : (i64) -> index
-! Compute lowerbound = i - 1 (single element: upperbound = lowerbound)
-! CHECK: %[[LB:.*]] = arith.subi %[[I_IDX]], %[[C1]] : index
-! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[LB]] : index) extent(%[[C1]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
-! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b
-! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_single_elementEb"}
-! Loop body uses the cached single element
-! CHECK: hlfir.designate %[[DECL]]#0
-! CHECK: fir.load
-! CHECK: hlfir.assign
-! Unstructured control flow: EXIT generates conditional branch
-! CHECK: %[[CMP:.*]] = arith.cmpf ogt, %{{.*}}, %{{.*}} : f32
-! CHECK: cf.cond_br %[[CMP]], ^[[EXIT_BB:.*]], ^[[CONT_BB:.*]]
-! CHECK: ^[[EXIT_BB]]:
-! Early exit path: branch to acc.yield
-! CHECK: cf.br ^[[YIELD:.*]]
-! CHECK: ^[[CONT_BB]]:
-! Normal path: update iterator and loop back
-! CHECK: cf.br ^[[HEADER]]
-! CHECK: ^[[YIELD]]:
-! Scope termination: acc.yield marks end of cache scope
-! CHECK: acc.yield
-! CHECK-NEXT: } attributes {{{.*}}unstructured}
-end subroutine
+!! TODO: For now this is disabled because we emit an NYI if an unstructured loop is
+!! associated with an OpenACC directive.
+!!
+!! xCHECK-LABEL: func.func @_QPtest_cache_single_element()
+!! Test cache with single element access: b(i)
+!! This test includes an EXIT statement to verify cache scope with early loop exit
+!subroutine test_cache_single_element()
+! integer, parameter :: n = 10
+! real, dimension(n) :: a, b
+! integer :: i
+!
+! !$acc loop
+! do i = 1, n
+! !$acc cache(b(i))
+! a(i) = b(i)
+! if (a(i) > 100.0) exit
+! end do
+!
+!! Unstructured loop with EXIT: acc.loop becomes unstructured with cf.br/cf.cond_br
+!! xCHECK: acc.loop private({{.*}}) {
+!! The privatized iterator is declared
+!! xCHECK: %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_cache_single_elementEi"}
+!! Loop control is done with cf.br/cf.cond_br in unstructured form
+!! xCHECK: cf.br ^[[HEADER:.*]]
+!! xCHECK: ^[[HEADER]]:
+!! xCHECK: cf.cond_br %{{.*}}, ^[[BODY:.*]], ^[[EXIT:.*]]
+!! xCHECK: ^[[BODY]]:
+!! xCHECK: %[[C1:.*]] = arith.constant 1 : index
+!! Load iterator i for bounds computation
+!! xCHECK: %[[I_LOAD:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
+!! xCHECK: %[[I_CVT1:.*]] = fir.convert %[[I_LOAD]] : (i32) -> i64
+!! xCHECK: %[[I_IDX:.*]] = fir.convert %[[I_CVT1]] : (i64) -> index
+!! Compute lowerbound = i - 1 (single element: upperbound = lowerbound)
+!! xCHECK: %[[LB:.*]] = arith.subi %[[I_IDX]], %[[C1]] : index
+!! xCHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[LB]] : index) extent(%[[C1]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
+!! xCHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b
+!! xCHECK: %[[DECL:.*]]:2 = hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_single_elementEb"}
+!! Loop body uses the cached single element
+!! xCHECK: hlfir.designate %[[DECL]]#0
+!! xCHECK: fir.load
+!! xCHECK: hlfir.assign
+!! Unstructured control flow: EXIT generates conditional branch
+!! xCHECK: %[[CMP:.*]] = arith.cmpf ogt, %{{.*}}, %{{.*}} : f32
+!! xCHECK: cf.cond_br %[[CMP]], ^[[EXIT_BB:.*]], ^[[CONT_BB:.*]]
+!! xCHECK: ^[[EXIT_BB]]:
+!! Early exit path: branch to acc.yield
+!! xCHECK: cf.br ^[[YIELD:.*]]
+!! xCHECK: ^[[CONT_BB]]:
+!! Normal path: update iterator and loop back
+!! xCHECK: cf.br ^[[HEADER]]
+!! xCHECK: ^[[YIELD]]:
+!! Scope termination: acc.yield marks end of cache scope
+!! xCHECK: acc.yield
+!! xCHECK-NEXT: } attributes {{{.*}}unstructured}
+!end subroutine
+!
! CHECK-LABEL: func.func @_QPtest_cache_mixed_bounds()
! Test cache with mixed constant and variable bounds: b(1:i)
@@ -365,69 +369,72 @@ subroutine test_cache_mixed_bounds()
! CHECK-NEXT: }
end subroutine
-! CHECK-LABEL: func.func @_QPtest_cache_nonunit_lb()
-! Test cache with array that has non-1 lower bound: arr(10:20), cache(arr(15))
-! This test includes SELECT CASE for multi-way unstructured control flow
-subroutine test_cache_nonunit_lb()
- integer :: arr(10:20)
- integer :: i
-
- !$acc loop
- do i = 10, 20
- !$acc cache(arr(15))
- select case (mod(i, 3))
- case (0)
- arr(i) = i * 2
- case (1)
- arr(i) = i * 3
- case default
- arr(i) = i
- end select
- end do
-
-! For arr(10:20), startIdx = 10, element 15 has lowerbound = 15 - 10 = 5
-! CHECK: %[[C10:.*]] = arith.constant 10 : index
-! Unstructured loop with SELECT CASE: acc.loop becomes unstructured
-! CHECK: acc.loop private({{.*}}) {
-! CHECK: cf.br ^[[HEADER:.*]]
-! CHECK: ^[[HEADER]]:
-! CHECK: cf.cond_br %{{.*}}, ^[[BODY:.*]], ^[[EXIT:.*]]
-! CHECK: ^[[BODY]]:
-! Compute lowerbound = 15 - startIdx = 15 - 10 = 5
-! CHECK: %[[C1:.*]] = arith.constant 1 : index
-! CHECK: %[[C15:.*]] = arith.constant 15 : index
-! CHECK: %[[LB:.*]] = arith.subi %[[C15]], %{{.*}} : index
-! Single element: upperbound equals lowerbound, startIdx = 10
-! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[LB]] : index) extent(%[[C1]] : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index) {strideInBytes = true}
-! For non-unit lower bound arrays, acc.cache uses the box type from hlfir.declare
-! CHECK: %[[CACHE:.*]] = acc.cache var(%{{.*}} : !fir.box<!fir.array<11xi32>>) bounds(%[[BOUND]]) -> !fir.box<!fir.array<11xi32>> {{{.*}}name = "arr
-! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_nonunit_lbEarr"}
-! Unstructured control flow: SELECT CASE generates fir.select_case
-! CHECK: %[[MOD:.*]] = arith.remsi %{{.*}}, %{{.*}} : i32
-! CHECK: fir.select_case %[[MOD]] : i32 [#fir.point, %{{.*}}, ^[[CASE0:.*]], #fir.point, %{{.*}}, ^[[CASE1:.*]], unit, ^[[DEFAULT:.*]]]
-! Case 0: i * 2
-! CHECK: ^[[CASE0]]:
-! CHECK: hlfir.designate %[[DECL]]#0
-! CHECK: hlfir.assign
-! CHECK: cf.br ^[[MERGE:.*]]
-! Case 1: i * 3
-! CHECK: ^[[CASE1]]:
-! CHECK: hlfir.designate %[[DECL]]#0
-! CHECK: hlfir.assign
-! CHECK: cf.br ^[[MERGE]]
-! Default case: i
-! CHECK: ^[[DEFAULT]]:
-! CHECK: hlfir.designate %[[DECL]]#0
-! CHECK: hlfir.assign
-! CHECK: cf.br ^[[MERGE]]
-! All SELECT CASE branches converge, then loop back or exit
-! CHECK: ^[[MERGE]]:
-! CHECK: cf.br ^[[HEADER]]
-! CHECK: ^[[EXIT]]:
-! Scope termination: acc.yield marks end of cache scope
-! CHECK: acc.yield
-! CHECK-NEXT: } attributes {{{.*}}unstructured}
-end subroutine
+!! TODO: For now this is disabled because we emit an NYI if an unstructured loop is
+!! associated with an OpenACC directive.
+!!
+!! xCHECK-LABEL: func.func @_QPtest_cache_nonunit_lb()
+!! Test cache with array that has non-1 lower bound: arr(10:20), cache(arr(15))
+!! This test includes SELECT CASE for multi-way unstructured control flow
+!subroutine test_cache_nonunit_lb()
+! integer :: arr(10:20)
+! integer :: i
+!
+! !$acc loop
+! do i = 10, 20
+! !$acc cache(arr(15))
+! select case (mod(i, 3))
+! case (0)
+! arr(i) = i * 2
+! case (1)
+! arr(i) = i * 3
+! case default
+! arr(i) = i
+! end select
+! end do
+!
+!! For arr(10:20), startIdx = 10, element 15 has lowerbound = 15 - 10 = 5
+!! xCHECK: %[[C10:.*]] = arith.constant 10 : index
+!! Unstructured loop with SELECT CASE: acc.loop becomes unstructured
+!! xCHECK: acc.loop private({{.*}}) {
+!! xCHECK: cf.br ^[[HEADER:.*]]
+!! xCHECK: ^[[HEADER]]:
+!! xCHECK: cf.cond_br %{{.*}}, ^[[BODY:.*]], ^[[EXIT:.*]]
+!! xCHECK: ^[[BODY]]:
+!! Compute lowerbound = 15 - startIdx = 15 - 10 = 5
+!! xCHECK: %[[C1:.*]] = arith.constant 1 : index
+!! xCHECK: %[[C15:.*]] = arith.constant 15 : index
+!! xCHECK: %[[LB:.*]] = arith.subi %[[C15]], %{{.*}} : index
+!! Single element: upperbound equals lowerbound, startIdx = 10
+!! xCHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[LB]] : index) extent(%[[C1]] : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index) {strideInBytes = true}
+!! For non-unit lower bound arrays, acc.cache uses the box type from hlfir.declare
+!! xCHECK: %[[CACHE:.*]] = acc.cache var(%{{.*}} : !fir.box<!fir.array<11xi32>>) bounds(%[[BOUND]]) -> !fir.box<!fir.array<11xi32>> {{{.*}}name = "arr
+!! xCHECK: %[[DECL:.*]]:2 = hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_nonunit_lbEarr"}
+!! Unstructured control flow: SELECT CASE generates fir.select_case
+!! xCHECK: %[[MOD:.*]] = arith.remsi %{{.*}}, %{{.*}} : i32
+!! xCHECK: fir.select_case %[[MOD]] : i32 [#fir.point, %{{.*}}, ^[[CASE0:.*]], #fir.point, %{{.*}}, ^[[CASE1:.*]], unit, ^[[DEFAULT:.*]]]
+!! Case 0: i * 2
+!! xCHECK: ^[[CASE0]]:
+!! xCHECK: hlfir.designate %[[DECL]]#0
+!! xCHECK: hlfir.assign
+!! xCHECK: cf.br ^[[MERGE:.*]]
+!! Case 1: i * 3
+!! xCHECK: ^[[CASE1]]:
+!! xCHECK: hlfir.designate %[[DECL]]#0
+!! xCHECK: hlfir.assign
+!! xCHECK: cf.br ^[[MERGE]]
+!! Default case: i
+!! xCHECK: ^[[DEFAULT]]:
+!! xCHECK: hlfir.designate %[[DECL]]#0
+!! xCHECK: hlfir.assign
+!! xCHECK: cf.br ^[[MERGE]]
+!! All SELECT CASE branches converge, then loop back or exit
+!! xCHECK: ^[[MERGE]]:
+!! xCHECK: cf.br ^[[HEADER]]
+!! xCHECK: ^[[EXIT]]:
+!! Scope termination: acc.yield marks end of cache scope
+!! xCHECK: acc.yield
+!! xCHECK-NEXT: } attributes {{{.*}}unstructured}
+!end subroutine
! CHECK-LABEL: func.func @_QPtest_cache_use_after_region()
! CHECK: %[[B_VAR:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_cache_use_after_regionEb"}
diff --git a/flang/test/Lower/OpenACC/acc-loop-exit.f90 b/flang/test/Lower/OpenACC/acc-loop-exit.f90
index 6ab215fdbd842..9f9e49e009c5b 100644
--- a/flang/test/Lower/OpenACC/acc-loop-exit.f90
+++ b/flang/test/Lower/OpenACC/acc-loop-exit.f90
@@ -1,4 +1,4 @@
-! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %s -o - 2>&1 | FileCheck %s
subroutine sub1(x, a)
real :: x(200)
@@ -13,29 +13,35 @@ subroutine sub1(x, a)
i = 2
end
-! CHECK-LABEL: func.func @_QPsub1
-! CHECK: %[[A:.*]]:2 = hlfir.declare %arg1 dummy_scope %{{[0-9]+}} arg {{[0-9]+}} {uniq_name = "_QFsub1Ea"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %[[I:.*]]:2 = hlfir.declare %{{[0-9]+}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %[[EXIT_COND:.*]] = acc.loop
-! CHECK: %[[I:.*]]:2 = hlfir.declare %{{[0-9]+}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: ^bb{{.*}}:
-! CHECK: ^bb{{.*}}:
-! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
-! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
-! CHECK: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
-! CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[LOAD_I]], %[[LOAD_A]] : i32
-! CHECK: cf.cond_br %[[CMP]], ^[[EARLY_RET:.*]], ^[[NO_RET:.*]]
-! CHECK: ^[[EARLY_RET]]:
-! CHECK: acc.yield %true : i1
-! CHECK: ^[[NO_RET]]:
-! CHECK: cf.br ^bb{{.*}}
-! CHECK: ^bb{{.*}}:
-! CHECK: acc.yield %false : i1
-! CHECK: }(i1)
-! CHECK: cf.cond_br %[[EXIT_COND]], ^[[EXIT_BLOCK:.*]], ^[[CONTINUE_BLOCK:.*]]
-! CHECK: ^[[CONTINUE_BLOCK]]:
-! CHECK: hlfir.assign
-! CHECK: cf.br ^[[EXIT_BLOCK]]
-! CHECK: ^[[EXIT_BLOCK]]:
-! CHECK: return
-! CHECK: }
+! CHECK: not yet implemented: unstructured do loop in acc loop construct
+
+!! TODO: For now this is disabled because we emit an NYI if an unstructured loop is
+!! associated with an OpenACC directive.
+!!
+!! xCHECK-LABEL: func.func @_QPsub1
+!! xCHECK: %[[A:.*]]:2 = hlfir.declare %arg1 dummy_scope %{{[0-9]+}} arg {{[0-9]+}} {uniq_name = "_QFsub1Ea"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+!! xCHECK: %[[I:.*]]:2 = hlfir.declare %{{[0-9]+}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!! xCHECK: %[[EXIT_COND:.*]] = acc.loop
+!! xCHECK: %[[I:.*]]:2 = hlfir.declare %{{[0-9]+}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!! xCHECK: ^bb{{.*}}:
+!! xCHECK: ^bb{{.*}}:
+!! xCHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
+!! xCHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
+!! xCHECK: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
+!! xCHECK: %[[CMP:.*]] = arith.cmpi eq, %[[LOAD_I]], %[[LOAD_A]] : i32
+!! xCHECK: cf.cond_br %[[CMP]], ^[[EARLY_RET:.*]], ^[[NO_RET:.*]]
+!! xCHECK: ^[[EARLY_RET]]:
+!! xCHECK: acc.yield %true : i1
+!! xCHECK: ^[[NO_RET]]:
+!! xCHECK: cf.br ^bb{{.*}}
+!! xCHECK: ^bb{{.*}}:
+!! xCHECK: acc.yield %false : i1
+!! xCHECK: }(i1)
+!! xCHECK: cf.cond_br %[[EXIT_COND]], ^[[EXIT_BLOCK:.*]], ^[[CONTINUE_BLOCK:.*]]
+!! xCHECK: ^[[CONTINUE_BLOCK]]:
+!! xCHECK: hlfir.assign
+!! xCHECK: cf.br ^[[EXIT_BLOCK]]
+!! xCHECK: ^[[EXIT_BLOCK]]:
+!! xCHECK: return
+!! xCHECK: }
+
diff --git a/flang/test/Lower/OpenACC/acc-unstructured.f90 b/flang/test/Lower/OpenACC/acc-unstructured.f90
index ce58ae90bdc35..ed1536e5e73fa 100644
--- a/flang/test/Lower/OpenACC/acc-unstructured.f90
+++ b/flang/test/Lower/OpenACC/acc-unstructured.f90
@@ -39,29 +39,32 @@ subroutine test_unstructured1(a, b, c)
! CHECK: fir.call @_FortranAStopStatementText
-subroutine test_unstructured2(a, b, c)
- integer :: i, j, k
- real :: a(:,:,:), b(:,:,:), c(:,:,:)
-
- !$acc parallel loop
- do i = 1, 10
- do j = 1, 10
- do k = 1, 10
- if (a(1,2,3) > 10) stop 'just to be unstructured'
- end do
- end do
- end do
-
-! CHECK-LABEL: func.func @_QPtest_unstructured2
-! CHECK: acc.parallel
-! CHECK: acc.loop combined(parallel) private(%{{.*}} : !fir.ref<i32>) {
-! CHECK: fir.call @_FortranAStopStatementText
-! CHECK: acc.yield
-! CHECK: acc.yield
-! CHECK: } attributes {independent = [#acc.device_type<none>], unstructured}
-! CHECK: acc.yield
-
-end subroutine
+!! TODO: For now this is disabled because we emit an NYI if an unstructured loop is
+!! associated with an OpenACC directive.
+!!
+!subroutine test_unstructured2(a, b, c)
+! integer :: i, j, k
+! real :: a(:,:,:), b(:,:,:), c(:,:,:)
+!
+! !$acc parallel loop
+! do i = 1, 10
+! do j = 1, 10
+! do k = 1, 10
+! if (a(1,2,3) > 10) stop 'just to be unstructured'
+! end do
+! end do
+! end do
+!
+!! xCHECK-LABEL: func.func @_QPtest_unstructured2
+!! xCHECK: acc.parallel
+!! xCHECK: acc.loop combined(parallel) private(%{{.*}} : !fir.ref<i32>) {
+!! xCHECK: fir.call @_FortranAStopStatementText
+!! xCHECK: acc.yield
+!! xCHECK: acc.yield
+!! xCHECK: } attributes {independent = [#acc.device_type<none>], unstructured}
+!! xCHECK: acc.yield
+!
+!end subroutine
subroutine test_unstructured3(a, b, c)
integer :: i, j, k
@@ -138,60 +141,63 @@ subroutine test_unstructured5(a, n)
! CHECK: arith.cmpi eq
! CHECK: cf.cond_br
-! Test that GOTO exiting acc.loop (one level) generates acc.yield
-! instead of an invalid cross-region branch.
-subroutine test_unstructured6(N, A, B)
- implicit real*8 (a-h, o-z)
- !$acc routine gang
- dimension A(*), B(*)
- !$acc loop gang vector
- do 100 i = 1, N
- !$acc loop seq
- do 10 j = 1, 1000
- if (A(i) .gt. B(i)) goto 20
-10 continue
-20 B(i) = A(i)
-100 continue
-end subroutine
-
-! CHECK-LABEL: func.func @_QPtest_unstructured6
-! CHECK: acc.loop gang vector
-! CHECK: acc.loop
-! CHECK: arith.cmpf ogt
-! CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref<i32>
-! CHECK: acc.yield
-! CHECK: } attributes {seq = [#acc.device_type<none>], unstructured}
-
-! Test GOTO exiting acc.loop with intermediate code between loop end and
-! target. A jump table (exit selector + dispatch) skips the intermediate code.
-subroutine test_unstructured7(A, B, C, N)
- implicit real*8 (a-h, o-z)
- !$acc routine gang
- dimension A(*), B(*), C(*)
- !$acc loop gang vector
- do 100 i = 1, N
- !$acc loop seq
- do 10 j = 1, 1000
- if (A(i) .gt. B(i)) goto 20
-10 continue
- C(i) = 999.0
-20 B(i) = A(i)
-100 continue
-end subroutine
-
-! CHECK-LABEL: func.func @_QPtest_unstructured7
-! CHECK: acc.loop gang vector
-! Inner loop stores exit selector and yields:
-! CHECK: acc.loop
-! CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref<i32>
-! CHECK: acc.yield
-! CHECK: } attributes {seq = [#acc.device_type<none>], unstructured}
-! Jump table after inner loop:
-! CHECK: fir.load %{{.*}} : !fir.ref<i32>
-! CHECK: arith.cmpi eq
-! CHECK: cf.cond_br
-! Intermediate code on fall-through path:
-! CHECK: arith.constant 9.990000e+02
+!! TODO: For now this is disabled because we emit an NYI if an unstructured loop is
+!! associated with an OpenACC directive.
+!!
+!! Test that GOTO exiting acc.loop (one level) generates acc.yield
+!! instead of an invalid cross-region branch.
+!subroutine test_unstructured6(N, A, B)
+! implicit real*8 (a-h, o-z)
+! !$acc routine gang
+! dimension A(*), B(*)
+! !$acc loop gang vector
+! do 100 i = 1, N
+! !$acc loop seq
+! do 10 j = 1, 1000
+! if (A(i) .gt. B(i)) goto 20
+!10 continue
+!20 B(i) = A(i)
+!100 continue
+!end subroutine
+!
+!! xCHECK-LABEL: func.func @_QPtest_unstructured6
+!! xCHECK: acc.loop gang vector
+!! xCHECK: acc.loop
+!! xCHECK: arith.cmpf ogt
+!! xCHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref<i32>
+!! xCHECK: acc.yield
+!! xCHECK: } attributes {seq = [#acc.device_type<none>], unstructured}
+!
+!! Test GOTO exiting acc.loop with intermediate code between loop end and
+!! target. A jump table (exit selector + dispatch) skips the intermediate code.
+!subroutine test_unstructured7(A, B, C, N)
+! implicit real*8 (a-h, o-z)
+! !$acc routine gang
+! dimension A(*), B(*), C(*)
+! !$acc loop gang vector
+! do 100 i = 1, N
+! !$acc loop seq
+! do 10 j = 1, 1000
+! if (A(i) .gt. B(i)) goto 20
+!10 continue
+! C(i) = 999.0
+!20 B(i) = A(i)
+!100 continue
+!end subroutine
+!
+!! xCHECK-LABEL: func.func @_QPtest_unstructured7
+!! xCHECK: acc.loop gang vector
+!! Inner loop stores exit selector and yields:
+!! xCHECK: acc.loop
+!! xCHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref<i32>
+!! xCHECK: acc.yield
+!! xCHECK: } attributes {seq = [#acc.device_type<none>], unstructured}
+!! Jump table after inner loop:
+!! xCHECK: fir.load %{{.*}} : !fir.ref<i32>
+!! xCHECK: arith.cmpi eq
+!! xCHECK: cf.cond_br
+!! Intermediate code on fall-through path:
+!! xCHECK: arith.constant 9.990000e+02
! Test GOTO exiting acc.data with intermediate code. Jump table dispatches
! after the acc.data op.
@@ -223,90 +229,93 @@ subroutine test_unstructured8(a, n)
! CHECK: arith.cmpi eq
! CHECK: cf.cond_br
-! Test that `acc parallel loop collapse(N)` whose body has an early-exit
-! (here, `if (cond) then ... cycle ... end if`) lowers cleanly. The
-! corresponding acc.loop must privatize all N induction variables, carry
-! both `collapse = [N]` and `unstructured` attributes, and emit the
-! iteration mechanics for all N levels as explicit cf inside the body.
-! Reproducer derived from lorado issue #2856.
-subroutine test_unstructured_collapse_cycle(a)
- integer :: i, j, jdiag
- real(8) :: a(:,:)
- jdiag = 4
- !$acc parallel loop collapse(2) copy(a)
- do j = 1, 8
- do i = 1, 8
- if (i == jdiag) then
- a(i, j) = 0.0d0
- cycle
- end if
- a(i, j) = real(i + j, 8)
- end do
- end do
- !$acc end parallel loop
-end subroutine
-
-! CHECK-LABEL: func.func @_QPtest_unstructured_collapse_cycle
-! CHECK: acc.parallel combined(loop)
-! Both induction variables (j and i) are privatized:
-! CHECK: %[[PRIVJ:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "j"}
-! CHECK: %[[PRIVI:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "i"}
-! No control(...) on acc.loop — bounds are not on the op:
-! CHECK: acc.loop combined(parallel) private(%[[PRIVJ]], %[[PRIVI]] : !fir.ref<i32>, !fir.ref<i32>) {
-! Outer loop trip-count test (j) emitted as cf:
-! CHECK: arith.cmpi sgt
-! CHECK: cf.cond_br
-! Inner loop trip-count test (i) emitted as cf:
-! CHECK: arith.cmpi sgt
-! CHECK: cf.cond_br
-! The if/cycle is a structured cf branch in the body:
-! CHECK: arith.cmpi eq
-! CHECK: cf.cond_br
-! CHECK: acc.yield
-! CHECK: } attributes {collapse = [2], collapseDeviceType = [#acc.device_type<none>], independent = [#acc.device_type<none>], unstructured}
-
-! Test that `acc parallel loop collapse(N)` lowers cleanly when the early-exit
-! is a STOP (the form already covered for collapse=1 by test_unstructured2).
-subroutine test_unstructured_collapse_stop(a)
- integer :: i, j, k
- real :: a(:,:,:)
- !$acc parallel loop collapse(3)
- do i = 1, 10
- do j = 1, 10
- do k = 1, 10
- if (a(1,2,3) > 10) stop 'just to be unstructured'
- end do
- end do
- end do
-end subroutine
-
-! CHECK-LABEL: func.func @_QPtest_unstructured_collapse_stop
-! All three IVs privatized:
-! CHECK: acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "i"}
-! CHECK: acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "j"}
-! CHECK: acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "k"}
-! CHECK: acc.loop combined(parallel) private(%{{.*}}, %{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
-! CHECK: fir.call @_FortranAStopStatementText
-! CHECK: } attributes {collapse = [3], collapseDeviceType = [#acc.device_type<none>], independent = [#acc.device_type<none>], unstructured}
-
-! Test orphaned `acc loop collapse(N)`
-subroutine test_unstructured_collapse_loop_only(a)
- integer :: i, j, jdiag
- real(8) :: a(:,:)
- jdiag = 4
- !$acc loop collapse(2)
- do j = 1, 8
- do i = 1, 8
- if (i == jdiag) then
- a(i, j) = 0.0d0
- cycle
- end if
- a(i, j) = real(i + j, 8)
- end do
- end do
-end subroutine
-
-! CHECK-LABEL: func.func @_QPtest_unstructured_collapse_loop_only
-! Standalone acc.loop (no `combined(...)`):
-! CHECK: acc.loop private(%{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>) {
-! CHECK: } attributes {collapse = [2], collapseDeviceType = [#acc.device_type<none>], independent = [#acc.device_type<none>], unstructured}
+!! TODO: For now this is disabled because we emit an NYI if an unstructured loop is
+!! associated with an OpenACC directive.
+!!
+!! Test that `acc parallel loop collapse(N)` whose body has an early-exit
+!! (here, `if (cond) then ... cycle ... end if`) lowers cleanly. The
+!! corresponding acc.loop must privatize all N induction variables, carry
+!! both `collapse = [N]` and `unstructured` attributes, and emit the
+!! iteration mechanics for all N levels as explicit cf inside the body.
+!! Reproducer derived from lorado issue #2856.
+!subroutine test_unstructured_collapse_cycle(a)
+! integer :: i, j, jdiag
+! real(8) :: a(:,:)
+! jdiag = 4
+! !$acc parallel loop collapse(2) copy(a)
+! do j = 1, 8
+! do i = 1, 8
+! if (i == jdiag) then
+! a(i, j) = 0.0d0
+! cycle
+! end if
+! a(i, j) = real(i + j, 8)
+! end do
+! end do
+! !$acc end parallel loop
+!end subroutine
+!
+!! xCHECK-LABEL: func.func @_QPtest_unstructured_collapse_cycle
+!! xCHECK: acc.parallel combined(loop)
+!! Both induction variables (j and i) are privatized:
+!! xCHECK: %[[PRIVJ:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "j"}
+!! xCHECK: %[[PRIVI:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "i"}
+!! No control(...) on acc.loop — bounds are not on the op:
+!! xCHECK: acc.loop combined(parallel) private(%[[PRIVJ]], %[[PRIVI]] : !fir.ref<i32>, !fir.ref<i32>) {
+!! Outer loop trip-count test (j) emitted as cf:
+!! xCHECK: arith.cmpi sgt
+!! xCHECK: cf.cond_br
+!! Inner loop trip-count test (i) emitted as cf:
+!! xCHECK: arith.cmpi sgt
+!! xCHECK: cf.cond_br
+!! The if/cycle is a structured cf branch in the body:
+!! xCHECK: arith.cmpi eq
+!! xCHECK: cf.cond_br
+!! xCHECK: acc.yield
+!! xCHECK: } attributes {collapse = [2], collapseDeviceType = [#acc.device_type<none>], independent = [#acc.device_type<none>], unstructured}
+!
+!! Test that `acc parallel loop collapse(N)` lowers cleanly when the early-exit
+!! is a STOP (the form already covered for collapse=1 by test_unstructured2).
+!subroutine test_unstructured_collapse_stop(a)
+! integer :: i, j, k
+! real :: a(:,:,:)
+! !$acc parallel loop collapse(3)
+! do i = 1, 10
+! do j = 1, 10
+! do k = 1, 10
+! if (a(1,2,3) > 10) stop 'just to be unstructured'
+! end do
+! end do
+! end do
+!end subroutine
+!
+!! xCHECK-LABEL: func.func @_QPtest_unstructured_collapse_stop
+!! All three IVs privatized:
+!! xCHECK: acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "i"}
+!! xCHECK: acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "j"}
+!! xCHECK: acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "k"}
+!! xCHECK: acc.loop combined(parallel) private(%{{.*}}, %{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
+!! xCHECK: fir.call @_FortranAStopStatementText
+!! xCHECK: } attributes {collapse = [3], collapseDeviceType = [#acc.device_type<none>], independent = [#acc.device_type<none>], unstructured}
+!
+!! Test orphaned `acc loop collapse(N)`
+!subroutine test_unstructured_collapse_loop_only(a)
+! integer :: i, j, jdiag
+! real(8) :: a(:,:)
+! jdiag = 4
+! !$acc loop collapse(2)
+! do j = 1, 8
+! do i = 1, 8
+! if (i == jdiag) then
+! a(i, j) = 0.0d0
+! cycle
+! end if
+! a(i, j) = real(i + j, 8)
+! end do
+! end do
+!end subroutine
+!
+!! xCHECK-LABEL: func.func @_QPtest_unstructured_collapse_loop_only
+!! Standalone acc.loop (no `combined(...)`):
+!! xCHECK: acc.loop private(%{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>) {
+!! xCHECK: } attributes {collapse = [2], collapseDeviceType = [#acc.device_type<none>], independent = [#acc.device_type<none>], unstructured}
More information about the flang-commits
mailing list