[llvm-branch-commits] [flang] [flang][OpenMP] `do concurrent` to device mapping lit tests (PR #155992)
Kareem Ergawy via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Sep 1 22:25:59 PDT 2025
https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/155992
>From 77181e62b5b28424f0bbaad96cbc9820c9fadc53 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Fri, 29 Aug 2025 03:53:51 -0500
Subject: [PATCH] [flang][OpenMP] `do concurrent` to device mapping lit tests
Adds more lit tests for `do concurrent` device mapping.
---
.../Transforms/DoConcurrent/allocatable.f90 | 29 +++++
.../Transforms/DoConcurrent/host_eval.f90 | 63 +++++++++++
.../DoConcurrent/locally_destroyed_temp.f90 | 43 ++++---
.../DoConcurrent/map_shape_info.f90 | 104 +++++++++++++++++
.../multiple_iteration_ranges.f90 | 106 +++++++++++-------
.../DoConcurrent/non_reference_to_device.f90 | 34 ++++++
.../DoConcurrent/not_perfectly_nested.f90 | 66 +++++++----
.../DoConcurrent/runtime_sized_array.f90 | 42 +++++++
.../DoConcurrent/skip_all_nested_loops.f90 | 68 +++++++++++
9 files changed, 478 insertions(+), 77 deletions(-)
create mode 100644 flang/test/Transforms/DoConcurrent/allocatable.f90
create mode 100644 flang/test/Transforms/DoConcurrent/host_eval.f90
create mode 100644 flang/test/Transforms/DoConcurrent/map_shape_info.f90
create mode 100644 flang/test/Transforms/DoConcurrent/non_reference_to_device.f90
create mode 100644 flang/test/Transforms/DoConcurrent/runtime_sized_array.f90
create mode 100644 flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90
diff --git a/flang/test/Transforms/DoConcurrent/allocatable.f90 b/flang/test/Transforms/DoConcurrent/allocatable.f90
new file mode 100644
index 0000000000000..03962f150eb95
--- /dev/null
+++ b/flang/test/Transforms/DoConcurrent/allocatable.f90
@@ -0,0 +1,29 @@
+! Verifies that proper `omp.map.bounds` ops are emitted when an allocatable is
+! implicitly mapped by a `do concurrent` loop.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \
+! RUN: | FileCheck %s
+program main
+ implicit none
+
+ integer,parameter :: n = 1000000
+ real, allocatable, dimension(:) :: y
+ integer :: i
+
+ allocate(y(1:n))
+
+ do concurrent(i=1:n)
+ y(i) = 42
+ end do
+
+ deallocate(y)
+end program main
+
+! CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEy"}
+! CHECK: %[[Y_VAL:.*]] = fir.load %[[Y_DECL]]#0
+! CHECK: %[[Y_DIM0:.*]]:3 = fir.box_dims %[[Y_VAL]], %{{c0_.*}}
+! CHECK: %[[Y_LB:.*]] = arith.constant 0 : index
+! CHECK: %[[Y_UB:.*]] = arith.subi %[[Y_DIM0]]#1, %{{c1_.*}} : index
+! CHECK: %[[Y_BOUNDS:.*]] = omp.map.bounds lower_bound(%[[Y_LB]] : index) upper_bound(%[[Y_UB]] : index) extent(%[[Y_DIM0]]#1 : index)
+! CHECK: %[[MEM_MAP:.*]] = omp.map.info {{.*}} bounds(%[[Y_BOUNDS]])
+! CHECK: omp.map.info var_ptr(%[[Y_DECL]]#1 : {{.*}}) {{.*}} members(%[[MEM_MAP]] : {{.*}})
diff --git a/flang/test/Transforms/DoConcurrent/host_eval.f90 b/flang/test/Transforms/DoConcurrent/host_eval.f90
new file mode 100644
index 0000000000000..7d16a91ae6941
--- /dev/null
+++ b/flang/test/Transforms/DoConcurrent/host_eval.f90
@@ -0,0 +1,63 @@
+! Tests `host_eval` clause code-gen and loop nest bounds on host vs. device.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
+! RUN: -fdo-concurrent-to-openmp=device %s -o - \
+! RUN: | FileCheck %s --check-prefix=HOST -vv
+
+! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-hlfir -fopenmp \
+! RUN: -fopenmp-is-target-device -fdo-concurrent-to-openmp=device %s -o - \
+! RUN: | FileCheck %s --check-prefix=DEVICE
+
+program do_concurrent_host_eval
+ implicit none
+ integer :: i, j
+
+ do concurrent (i=1:10, j=1:20)
+ end do
+end program do_concurrent_host_eval
+
+! HOST: omp.target host_eval(
+! HOST-SAME: %{{[^[:space:]]+}} -> %[[I_LB:[^,]+]],
+! HOST-SAME: %{{[^[:space:]]+}} -> %[[I_UB:[^,]+]],
+! HOST-SAME: %{{[^[:space:]]+}} -> %[[I_ST:[^,]+]],
+! HOST-SAME: %{{[^[:space:]]+}} -> %[[J_LB:[^,]+]],
+! HOST-SAME: %{{[^[:space:]]+}} -> %[[J_UB:[^,]+]],
+! HOST-SAME: %{{[^[:space:]]+}} -> %[[J_ST:[^,]+]] : {{.*}}) map_entries
+
+! HOST: omp.loop_nest ({{.*}}, {{.*}}) : index = (%[[I_LB]], %[[J_LB]]) to
+! HOST-SAME: (%[[I_UB]], %[[J_UB]]) inclusive step
+! HOST-SAME: (%[[I_ST]], %[[J_ST]])
+
+! DEVICE: omp.target map_entries(
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_LB_MAP:[^,]+]],
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_UB_MAP:[^,]+]],
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_ST_MAP:[^,]+]],
+
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_LB_MAP:[^,]+]],
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_UB_MAP:[^,]+]],
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_ST_MAP:[^,]+]],
+
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}} : {{.*}})
+
+! DEVICE: %[[I_LB_DECL:.*]]:2 = hlfir.declare %[[I_LB_MAP]]
+! DEVICE: %[[I_LB:.*]] = fir.load %[[I_LB_DECL]]#1 : !fir.ref<index>
+
+! DEVICE: %[[I_UB_DECL:.*]]:2 = hlfir.declare %[[I_UB_MAP]]
+! DEVICE: %[[I_UB:.*]] = fir.load %[[I_UB_DECL]]#1 : !fir.ref<index>
+
+! DEVICE: %[[I_ST_DECL:.*]]:2 = hlfir.declare %[[I_ST_MAP]]
+! DEVICE: %[[I_ST:.*]] = fir.load %[[I_ST_DECL]]#1 : !fir.ref<index>
+
+! DEVICE: %[[J_LB_DECL:.*]]:2 = hlfir.declare %[[J_LB_MAP]]
+! DEVICE: %[[J_LB:.*]] = fir.load %[[J_LB_DECL]]#1 : !fir.ref<index>
+
+! DEVICE: %[[J_UB_DECL:.*]]:2 = hlfir.declare %[[J_UB_MAP]]
+! DEVICE: %[[J_UB:.*]] = fir.load %[[J_UB_DECL]]#1 : !fir.ref<index>
+
+! DEVICE: %[[J_ST_DECL:.*]]:2 = hlfir.declare %[[J_ST_MAP]]
+! DEVICE: %[[J_ST:.*]] = fir.load %[[J_ST_DECL]]#1 : !fir.ref<index>
+
+! DEVICE: omp.loop_nest ({{.*}}, {{.*}}) : index = (%[[I_LB]], %[[J_LB]]) to
+! DEVICE-SAME: (%[[I_UB]], %[[J_UB]]) inclusive step
+! DEVICE-SAME: (%[[I_ST]], %[[J_ST]])
diff --git a/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90 b/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90
index f82696669eca6..28429cebf8587 100644
--- a/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90
+++ b/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90
@@ -1,9 +1,14 @@
+! Fails until we update the pass to use the `fir.do_concurrent` op.
+
! Tests that "loop-local values" are properly handled by localizing them to the
! body of the loop nest. See `collectLoopLocalValues` and `localizeLoopLocalValue`
! for a definition of "loop-local values" and how they are handled.
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %s -o - \
-! RUN: | FileCheck %s
+! RUN: | FileCheck %s --check-prefixes=COMMON
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \
+! RUN: | FileCheck %s --check-prefixes=COMMON,DEVICE
module struct_mod
type test_struct
integer, allocatable :: x_
@@ -46,17 +51,25 @@ program main
print *, "total =", total
end program main
-! CHECK: omp.parallel {
-! CHECK: %[[LOCAL_TEMP:.*]] = fir.alloca !fir.type<_QMstruct_modTtest_struct{x_:!fir.box<!fir.heap<i32>>}> {bindc_name = ".result"}
-! CHECK: omp.wsloop {
-! CHECK: omp.loop_nest {{.*}} {
-! CHECK: %[[TEMP_VAL:.*]] = fir.call @_QMstruct_modPconstruct_from_components
-! CHECK: fir.save_result %[[TEMP_VAL]] to %[[LOCAL_TEMP]]
-! CHECK: %[[EMBOXED_LOCAL:.*]] = fir.embox %[[LOCAL_TEMP]]
-! CHECK: %[[CONVERTED_LOCAL:.*]] = fir.convert %[[EMBOXED_LOCAL]]
-! CHECK: fir.call @_FortranADestroy(%[[CONVERTED_LOCAL]])
-! CHECK: omp.yield
-! CHECK: }
-! CHECK: }
-! CHECK: omp.terminator
-! CHECK: }
+! DEVICE: omp.target {{.*}} {
+! DEVICE: omp.teams {
+! COMMON: omp.parallel {
+! COMMON: %[[LOCAL_TEMP:.*]] = fir.alloca !fir.type<_QMstruct_modTtest_struct{x_:!fir.box<!fir.heap<i32>>}> {bindc_name = ".result"}
+! DEVICE: omp.distribute {
+! COMMON: omp.wsloop {
+! COMMON: omp.loop_nest {{.*}} {
+! COMMON: %[[TEMP_VAL:.*]] = fir.call @_QMstruct_modPconstruct_from_components
+! COMMON: fir.save_result %[[TEMP_VAL]] to %[[LOCAL_TEMP]]
+! COMMON: %[[EMBOXED_LOCAL:.*]] = fir.embox %[[LOCAL_TEMP]]
+! COMMON: %[[CONVERTED_LOCAL:.*]] = fir.convert %[[EMBOXED_LOCAL]]
+! COMMON: fir.call @_FortranADestroy(%[[CONVERTED_LOCAL]])
+! COMMON: omp.yield
+! COMMON: }
+! COMMON: }
+! DEVICE: }
+! COMMON: omp.terminator
+! COMMON: }
+! DEVICE: omp.terminator
+! DEVICE: }
+! DEVICE: omp.terminator
+! DEVICE: }
diff --git a/flang/test/Transforms/DoConcurrent/map_shape_info.f90 b/flang/test/Transforms/DoConcurrent/map_shape_info.f90
new file mode 100644
index 0000000000000..3dca1340ae6b9
--- /dev/null
+++ b/flang/test/Transforms/DoConcurrent/map_shape_info.f90
@@ -0,0 +1,104 @@
+! Tests mapping of a basic `do concurrent` loop to
+! `!$omp target teams distribute parallel do`.
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \
+! RUN: | FileCheck %s
+
+program do_concurrent_shape
+ implicit none
+ integer :: a(10, 20)
+ integer :: i, j
+
+ do concurrent (i=1:10, j=1:20)
+ a(i, j) = i * j
+ end do
+end program do_concurrent_shape
+
+! CHECK: fir.store %{{c10.*}} to %[[DIM0_EXT:.*]] : !fir.ref<index>
+! CHECK: fir.store %{{c20.*}} to %[[DIM1_EXT:.*]] : !fir.ref<index>
+
+! CHECK: omp.map.info
+! CHECK: omp.map.info
+! CHECK: omp.map.info
+
+! CHECK: omp.map.info
+! CHECK: omp.map.info
+! CHECK: omp.map.info
+
+! CHECK: omp.map.info
+! CHECK: omp.map.info
+! CHECK: omp.map.info
+
+! CHECK: %[[DIM0_EXT_MAP:.*]] = omp.map.info
+! CHECK-SAME: var_ptr(%[[DIM0_EXT]] : !fir.ref<index>, index)
+! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc)
+! CHECK-SAME: capture(ByCopy) -> !fir.ref<index> {name = "_QFEa.extent.dim0"}
+
+! CHECK: %[[DIM1_EXT_MAP:.*]] = omp.map.info
+! CHECK-SAME: var_ptr(%[[DIM1_EXT]] : !fir.ref<index>, index)
+! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc)
+! CHECK-SAME: capture(ByCopy) -> !fir.ref<index> {name = "_QFEa.extent.dim1"}
+
+! CHECK: omp.target host_eval({{.*}}) map_entries(
+! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! CHECK-SAME: %[[DIM0_EXT_MAP]] -> %[[DIM0_EXT_ARG:[^,]+]],
+! CHECK-SAME: %[[DIM1_EXT_MAP]] -> %[[DIM1_EXT_ARG:[^,]+]] : {{.*}})
+
+! CHECK-DAG: %[[DIM0_EXT_DEV:.*]] = fir.load %[[DIM0_EXT_ARG]]
+! CHECK-DAG: %[[DIM1_EXT_DEV:.*]] = fir.load %[[DIM1_EXT_ARG]]
+
+! CHECK: %[[SHAPE:.*]] = fir.shape %[[DIM0_EXT_DEV]], %[[DIM1_EXT_DEV]]
+! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}}(%[[SHAPE]]) {uniq_name = "_QFEa"}
+
+subroutine do_concurrent_shape_shift
+ implicit none
+ integer :: a(2:10)
+ integer :: i
+
+ do concurrent (i=1:10)
+ a(i) = i
+ end do
+end subroutine do_concurrent_shape_shift
+
+! CHECK: fir.store %{{c2.*}} to %[[DIM0_STRT:.*]] : !fir.ref<index>
+! CHECK: fir.store %{{c9.*}} to %[[DIM0_EXT:.*]] : !fir.ref<index>
+
+! CHECK: omp.map.info
+! CHECK: omp.map.info
+! CHECK: omp.map.info
+
+! CHECK: omp.map.info
+! CHECK: omp.map.info
+
+! CHECK: %[[DIM0_STRT_MAP:.*]] = omp.map.info
+! CHECK-SAME: var_ptr(%[[DIM0_STRT]] : !fir.ref<index>, index)
+! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc)
+! CHECK-SAME: capture(ByCopy) -> !fir.ref<index> {name = "_QF{{.*}}Ea.start_idx.dim0"}
+
+! CHECK: %[[DIM0_EXT_MAP:.*]] = omp.map.info
+! CHECK-SAME: var_ptr(%[[DIM0_EXT]] : !fir.ref<index>, index)
+! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc)
+! CHECK-SAME: capture(ByCopy) -> !fir.ref<index> {name = "_QF{{.*}}Ea.extent.dim0"}
+
+! CHECK: omp.target host_eval({{.*}}) map_entries(
+! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! CHECK-SAME: %[[DIM0_STRT_MAP]] -> %[[DIM0_STRT_ARG:[^,]+]],
+! CHECK-SAME: %[[DIM0_EXT_MAP]] -> %[[DIM0_EXT_ARG:[^,]+]] : {{.*}})
+
+! CHECK-DAG: %[[DIM0_STRT_DEV:.*]] = fir.load %[[DIM0_STRT_ARG]]
+! CHECK-DAG: %[[DIM0_EXT_DEV:.*]] = fir.load %[[DIM0_EXT_ARG]]
+
+! CHECK: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[DIM0_STRT_DEV]], %[[DIM0_EXT_DEV]]
+! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}}(%[[SHAPE_SHIFT]]) {uniq_name = "_QF{{.*}}Ea"}
+
diff --git a/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 b/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90
index d0210726de83e..3ea32f9f4cecc 100644
--- a/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90
+++ b/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90
@@ -1,9 +1,14 @@
+! Fails until we update the pass to use the `fir.do_concurrent` op.
+
! Tests mapping of a `do concurrent` loop with multiple iteration ranges.
! RUN: split-file %s %t
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %t/multi_range.f90 -o - \
-! RUN: | FileCheck %s
+! RUN: | FileCheck %s --check-prefixes=HOST,COMMON
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %t/multi_range.f90 -o - \
+! RUN: | FileCheck %s --check-prefixes=DEVICE,COMMON
!--- multi_range.f90
program main
@@ -17,56 +22,75 @@ program main
end do
end
-! CHECK: func.func @_QQmain
+! COMMON: func.func @_QQmain
+
+! COMMON: %[[C3:.*]] = arith.constant 3 : i32
+! COMMON: %[[LB_I:.*]] = fir.convert %[[C3]] : (i32) -> index
+! COMMON: %[[C20:.*]] = arith.constant 20 : i32
+! COMMON: %[[UB_I:.*]] = fir.convert %[[C20]] : (i32) -> index
+! COMMON: %[[STEP_I:.*]] = arith.constant 1 : index
+
+! COMMON: %[[C5:.*]] = arith.constant 5 : i32
+! COMMON: %[[LB_J:.*]] = fir.convert %[[C5]] : (i32) -> index
+! COMMON: %[[C40:.*]] = arith.constant 40 : i32
+! COMMON: %[[UB_J:.*]] = fir.convert %[[C40]] : (i32) -> index
+! COMMON: %[[STEP_J:.*]] = arith.constant 1 : index
+
+! COMMON: %[[C7:.*]] = arith.constant 7 : i32
+! COMMON: %[[LB_K:.*]] = fir.convert %[[C7]] : (i32) -> index
+! COMMON: %[[C60:.*]] = arith.constant 60 : i32
+! COMMON: %[[UB_K:.*]] = fir.convert %[[C60]] : (i32) -> index
+! COMMON: %[[STEP_K:.*]] = arith.constant 1 : index
+
+! DEVICE: omp.target host_eval(
+! DEVICE-SAME: %[[LB_I]] -> %[[LB_I:[[:alnum:]]+]],
+! DEVICE-SAME: %[[UB_I]] -> %[[UB_I:[[:alnum:]]+]],
+! DEVICE-SAME: %[[STEP_I]] -> %[[STEP_I:[[:alnum:]]+]],
+! DEVICE-SAME: %[[LB_J]] -> %[[LB_J:[[:alnum:]]+]],
+! DEVICE-SAME: %[[UB_J]] -> %[[UB_J:[[:alnum:]]+]],
+! DEVICE-SAME: %[[STEP_J]] -> %[[STEP_J:[[:alnum:]]+]],
+! DEVICE-SAME: %[[LB_K]] -> %[[LB_K:[[:alnum:]]+]],
+! DEVICE-SAME: %[[UB_K]] -> %[[UB_K:[[:alnum:]]+]],
+! DEVICE-SAME: %[[STEP_K]] -> %[[STEP_K:[[:alnum:]]+]] :
+! DEVICE-SAME: index, index, index, index, index, index, index, index, index)
-! CHECK: %[[C3:.*]] = arith.constant 3 : i32
-! CHECK: %[[LB_I:.*]] = fir.convert %[[C3]] : (i32) -> index
-! CHECK: %[[C20:.*]] = arith.constant 20 : i32
-! CHECK: %[[UB_I:.*]] = fir.convert %[[C20]] : (i32) -> index
-! CHECK: %[[STEP_I:.*]] = arith.constant 1 : index
+! DEVICE: omp.teams
-! CHECK: %[[C5:.*]] = arith.constant 5 : i32
-! CHECK: %[[LB_J:.*]] = fir.convert %[[C5]] : (i32) -> index
-! CHECK: %[[C40:.*]] = arith.constant 40 : i32
-! CHECK: %[[UB_J:.*]] = fir.convert %[[C40]] : (i32) -> index
-! CHECK: %[[STEP_J:.*]] = arith.constant 1 : index
+! HOST-NOT: omp.target
+! HOST-NOT: omp.teams
-! CHECK: %[[C7:.*]] = arith.constant 7 : i32
-! CHECK: %[[LB_K:.*]] = fir.convert %[[C7]] : (i32) -> index
-! CHECK: %[[C60:.*]] = arith.constant 60 : i32
-! CHECK: %[[UB_K:.*]] = fir.convert %[[C60]] : (i32) -> index
-! CHECK: %[[STEP_K:.*]] = arith.constant 1 : index
+! COMMON: omp.parallel {
-! CHECK: omp.parallel {
+! COMMON-NEXT: %[[ITER_VAR_I:.*]] = fir.alloca i32 {bindc_name = "i"}
+! COMMON-NEXT: %[[BINDING_I:.*]]:2 = hlfir.declare %[[ITER_VAR_I]] {uniq_name = "_QFEi"}
-! CHECK-NEXT: %[[ITER_VAR_I:.*]] = fir.alloca i32 {bindc_name = "i"}
-! CHECK-NEXT: %[[BINDING_I:.*]]:2 = hlfir.declare %[[ITER_VAR_I]] {uniq_name = "_QFEi"}
+! COMMON-NEXT: %[[ITER_VAR_J:.*]] = fir.alloca i32 {bindc_name = "j"}
+! COMMON-NEXT: %[[BINDING_J:.*]]:2 = hlfir.declare %[[ITER_VAR_J]] {uniq_name = "_QFEj"}
-! CHECK-NEXT: %[[ITER_VAR_J:.*]] = fir.alloca i32 {bindc_name = "j"}
-! CHECK-NEXT: %[[BINDING_J:.*]]:2 = hlfir.declare %[[ITER_VAR_J]] {uniq_name = "_QFEj"}
+! COMMON-NEXT: %[[ITER_VAR_K:.*]] = fir.alloca i32 {bindc_name = "k"}
+! COMMON-NEXT: %[[BINDING_K:.*]]:2 = hlfir.declare %[[ITER_VAR_K]] {uniq_name = "_QFEk"}
-! CHECK-NEXT: %[[ITER_VAR_K:.*]] = fir.alloca i32 {bindc_name = "k"}
-! CHECK-NEXT: %[[BINDING_K:.*]]:2 = hlfir.declare %[[ITER_VAR_K]] {uniq_name = "_QFEk"}
+! DEVICE: omp.distribute
-! CHECK: omp.wsloop {
-! CHECK-NEXT: omp.loop_nest
-! CHECK-SAME: (%[[ARG0:[^[:space:]]+]], %[[ARG1:[^[:space:]]+]], %[[ARG2:[^[:space:]]+]])
-! CHECK-SAME: : index = (%[[LB_I]], %[[LB_J]], %[[LB_K]])
-! CHECK-SAME: to (%[[UB_I]], %[[UB_J]], %[[UB_K]]) inclusive
-! CHECK-SAME: step (%[[STEP_I]], %[[STEP_J]], %[[STEP_K]]) {
+! COMMON: omp.wsloop {
+! COMMON-NEXT: omp.loop_nest
+! COMMON-SAME: (%[[ARG0:[^[:space:]]+]], %[[ARG1:[^[:space:]]+]], %[[ARG2:[^[:space:]]+]])
+! COMMON-SAME: : index = (%[[LB_I]], %[[LB_J]], %[[LB_K]])
+! COMMON-SAME: to (%[[UB_I]], %[[UB_J]], %[[UB_K]]) inclusive
+! COMMON-SAME: step (%[[STEP_I]], %[[STEP_J]], %[[STEP_K]]) {
-! CHECK-NEXT: %[[IV_IDX_I:.*]] = fir.convert %[[ARG0]]
-! CHECK-NEXT: fir.store %[[IV_IDX_I]] to %[[BINDING_I]]#0
+! COMMON-NEXT: %[[IV_IDX_I:.*]] = fir.convert %[[ARG0]]
+! COMMON-NEXT: fir.store %[[IV_IDX_I]] to %[[BINDING_I]]#0
-! CHECK-NEXT: %[[IV_IDX_J:.*]] = fir.convert %[[ARG1]]
-! CHECK-NEXT: fir.store %[[IV_IDX_J]] to %[[BINDING_J]]#0
+! COMMON-NEXT: %[[IV_IDX_J:.*]] = fir.convert %[[ARG1]]
+! COMMON-NEXT: fir.store %[[IV_IDX_J]] to %[[BINDING_J]]#0
-! CHECK-NEXT: %[[IV_IDX_K:.*]] = fir.convert %[[ARG2]]
-! CHECK-NEXT: fir.store %[[IV_IDX_K]] to %[[BINDING_K]]#0
+! COMMON-NEXT: %[[IV_IDX_K:.*]] = fir.convert %[[ARG2]]
+! COMMON-NEXT: fir.store %[[IV_IDX_K]] to %[[BINDING_K]]#0
-! CHECK: omp.yield
-! CHECK-NEXT: }
-! CHECK-NEXT: }
+! COMMON: omp.yield
+! COMMON-NEXT: }
+! COMMON-NEXT: }
-! CHECK-NEXT: omp.terminator
-! CHECK-NEXT: }
+! HOST-NEXT: omp.terminator
+! HOST-NEXT: }
diff --git a/flang/test/Transforms/DoConcurrent/non_reference_to_device.f90 b/flang/test/Transforms/DoConcurrent/non_reference_to_device.f90
new file mode 100644
index 0000000000000..b6b2136e2d405
--- /dev/null
+++ b/flang/test/Transforms/DoConcurrent/non_reference_to_device.f90
@@ -0,0 +1,34 @@
+! Tests that we can map "unnamed" and non-reference/non-box values to device; for
+! example, values that result from `fix.box_dims` ops.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \
+! RUN: | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \
+! RUN: | FileCheck %s
+
+subroutine test_non_refernece
+ integer i
+ real, allocatable :: arr(:)
+
+ associate(a => arr)
+ do concurrent (i = 1:10)
+ block
+ real z(size(a,1))
+ end block
+ end do
+ end associate
+end subroutine test_non_refernece
+
+! CHECK: omp.map.info var_ptr(%{{.*}} : !fir.ref<index>, index)
+! CHECK: omp.map.info var_ptr(%{{.*}} : !fir.ref<index>, index)
+! CHECK: omp.map.info var_ptr(%{{.*}} : !fir.ref<index>, index)
+
+! CHECK: %[[DIM_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<index>, index)
+! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc)
+! CHECK-SAME: capture(ByCopy) -> !fir.ref<index> {name = ""}
+
+
+! CHECK: omp.target host_eval({{.*}} : index, index, index)
+! CHECK-SAME: map_entries({{.*}}, %[[DIM_MAP]] -> %{{.*}} :
+! CHECK-SAME: !fir.ref<i32>, !fir.ref<index>)
+
diff --git a/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 b/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90
index 74799359e0476..c87cf392bd5d6 100644
--- a/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90
+++ b/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90
@@ -1,8 +1,14 @@
+! Fails until we update the pass to use the `fir.do_concurrent` op.
+
! Tests that if `do concurrent` is not perfectly nested in its parent loop, that
! we skip converting the not-perfectly nested `do concurrent` loop.
+
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %s -o - \
-! RUN: | FileCheck %s
+! RUN: | FileCheck %s --check-prefixes=COMMON
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \
+! RUN: | FileCheck %s --check-prefixes=DEVICE,COMMON
program main
integer, parameter :: n = 10
@@ -19,28 +25,46 @@ program main
end do
end
-! CHECK: omp.parallel {
-! CHECK: omp.wsloop {
-! CHECK: omp.loop_nest ({{[^[:space:]]+}}) {{.*}} {
-! CHECK: fir.do_concurrent {
-! CHECK: %[[ORIG_J_ALLOC:.*]] = fir.alloca i32 {bindc_name = "j"}
-! CHECK: %[[ORIG_J_DECL:.*]]:2 = hlfir.declare %[[ORIG_J_ALLOC]]
+! DEVICE: omp.target {{.*}}map_entries(
+! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
+! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
+! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_ARG:[^,]+]],
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[X_ARG:[^,]+]],
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[A_ARG:[^,]+]],
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %{{[^:]+}} :
+! DEVICE-SAME: {{.*}}) {
+
+! DEVICE: omp.teams
+
+! COMMON: omp.parallel {
+
+! DEVICE: omp.distribute
+
+! COMMON: omp.wsloop {
+! COMMON: omp.loop_nest ({{[^[:space:]]+}}) {{.*}} {
+! COMMON: fir.do_concurrent {
+
+! COMMON: %[[ORIG_J_ALLOC:.*]] = fir.alloca i32 {bindc_name = "j"}
+! COMMON: %[[ORIG_J_DECL:.*]]:2 = hlfir.declare %[[ORIG_J_ALLOC]]
-! CHECK: %[[ORIG_K_ALLOC:.*]] = fir.alloca i32 {bindc_name = "k"}
-! CHECK: %[[ORIG_K_DECL:.*]]:2 = hlfir.declare %[[ORIG_K_ALLOC]]
+! COMMON: %[[ORIG_K_ALLOC:.*]] = fir.alloca i32 {bindc_name = "k"}
+! COMMON: %[[ORIG_K_DECL:.*]]:2 = hlfir.declare %[[ORIG_K_ALLOC]]
-! CHECK: fir.do_concurrent.loop (%[[J_IV:.*]], %[[K_IV:.*]]) = {{.*}} {
-! CHECK: %[[J_IV_CONV:.*]] = fir.convert %[[J_IV]] : (index) -> i32
-! CHECK: fir.store %[[J_IV_CONV]] to %[[ORIG_J_DECL]]#0
+! COMMON: fir.do_concurrent.loop (%[[J_IV:.*]], %[[K_IV:.*]]) = {{.*}} {
+! COMMON: %[[J_IV_CONV:.*]] = fir.convert %[[J_IV]] : (index) -> i32
+! COMMON: fir.store %[[J_IV_CONV]] to %[[ORIG_J_DECL]]#0
-! CHECK: %[[K_IV_CONV:.*]] = fir.convert %[[K_IV]] : (index) -> i32
-! CHECK: fir.store %[[K_IV_CONV]] to %[[ORIG_K_DECL]]#0
-! CHECK: }
-! CHECK: }
-! CHECK: omp.yield
-! CHECK: }
-! CHECK: }
-! CHECK: omp.terminator
-! CHECK: }
+! COMMON: %[[K_IV_CONV:.*]] = fir.convert %[[K_IV]] : (index) -> i32
+! COMMON: fir.store %[[K_IV_CONV]] to %[[ORIG_K_DECL]]#0
+! COMMON: }
+! COMMON: }
+! COMMON: omp.yield
+! COMMON: }
+! COMMON: }
+! COMMON: omp.terminator
+! COMMON: }
diff --git a/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 b/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90
new file mode 100644
index 0000000000000..e38474a68747f
--- /dev/null
+++ b/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90
@@ -0,0 +1,42 @@
+! Tests `do concurrent` mapping when mapped value(s) depend on values defined
+! outside the target region; e.g. the size of the array is dynamic. This needs
+! to be handled by localizing these region outsiders by either cloning them in
+! the region or in case we cannot do that, map them and use the mapped values.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \
+! RUN: | FileCheck %s
+
+subroutine foo(n)
+ implicit none
+ integer :: n
+ integer :: i
+ integer, dimension(n) :: a
+
+ do concurrent(i=1:10)
+ a(i) = i
+ end do
+end subroutine
+
+! CHECK-DAG: %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFfooEi"}
+! CHECK-DAG: %[[A_DECL:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFfooEa"}
+
+! CHECK-DAG: %[[I_MAP:.*]] = omp.map.info var_ptr(%[[I_DECL]]#1 : {{.*}}) {{.*}} {name = "_QFfooEi"}
+! CHECK-DAG: %[[A_MAP:.*]] = omp.map.info var_ptr(%[[A_DECL]]#1 : {{.*}}) {{.*}} {name = "_QFfooEa"}
+! CHECK-DAG: %[[N_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : {{.*}}) {{.*}} {name = "_QFfooEa.extent.dim0"}
+
+! CHECK: omp.target
+! CHECK-SAME: map_entries(
+! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
+! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
+! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
+! CHECK-SAME: %[[I_MAP]] -> %[[I_ARG:arg[0-9]*]],
+! CHECK-SAME: %[[A_MAP]] -> %[[A_ARG:arg[0-9]*]],
+! CHECK-SAME: %[[N_MAP]] -> %[[N_ARG:arg[0-9]*]] : {{.*}})
+! CHECK-SAME: {{.*}} {
+
+! CHECK-DAG: %{{.*}} = hlfir.declare %[[I_ARG]]
+! CHECK-DAG: %{{.*}} = hlfir.declare %[[A_ARG]]
+! CHECK-DAG: %{{.*}} = fir.load %[[N_ARG]]
+
+! CHECK: omp.terminator
+! CHECK: }
diff --git a/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 b/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90
new file mode 100644
index 0000000000000..2dada05396ad6
--- /dev/null
+++ b/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90
@@ -0,0 +1,68 @@
+! Tests that if `do concurrent` is indirectly nested in its parent loop, that we
+! skip converting the indirectly nested `do concurrent` loop.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %s -o - \
+! RUN: | FileCheck %s --check-prefixes=HOST,COMMON
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \
+! RUN: | FileCheck %s --check-prefixes=DEVICE,COMMON
+
+program main
+ integer, parameter :: n = 10
+ integer, parameter :: m = 20
+ integer, parameter :: l = 30
+ integer x;
+ integer :: a(n, m, l)
+
+ do concurrent(i=1:n)
+ do j=1,m
+ do concurrent(k=1:l)
+ a(i,j,k) = i * j + k
+ end do
+ end do
+ end do
+end
+
+! HOST: %[[ORIG_J_ALLOC:.*]] = fir.alloca i32 {bindc_name = "j", {{.*}}}
+! HOST: %[[ORIG_J_DECL:.*]]:2 = hlfir.declare %[[ORIG_J_ALLOC]]
+
+! DEVICE: omp.target {{.*}}map_entries(
+! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
+! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
+! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_ARG:[^,]+]],
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_ARG:[^,]+]],
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[A_ARG:[^,]+]],
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
+! DEVICE-SAME: %{{[^[:space:]]+}} -> %{{[^:]+}} :
+! DEVICE-SAME: {{.*}}) {
+
+! DEVICE: %[[TARGET_J_DECL:.*]]:2 = hlfir.declare %[[J_ARG]] {uniq_name = "_QFEj"}
+
+! DEVICE: omp.teams
+
+! COMMON: omp.parallel {
+
+! DEVICE: omp.distribute
+
+! COMMON: omp.wsloop {
+! COMMON: omp.loop_nest ({{[^[:space:]]+}}) {{.*}} {
+! COMMON: fir.do_loop {{.*}} iter_args(%[[J_IV:.*]] = {{.*}}) -> {{.*}} {
+! HOST: fir.store %[[J_IV]] to %[[ORIG_J_DECL]]#0
+! DEVICE: fir.store %[[J_IV]] to %[[TARGET_J_DECL]]#0
+
+! COMMON: fir.do_concurrent {
+! COMMON: %[[ORIG_K_ALLOC:.*]] = fir.alloca i32 {bindc_name = "k"}
+! COMMON: %[[ORIG_K_DECL:.*]]:2 = hlfir.declare %[[ORIG_K_ALLOC]]
+! COMMON: fir.do_concurrent.loop (%[[K_IV:.*]]) = {{.*}} {
+! COMMON: %[[K_IV_CONV:.*]] = fir.convert %[[K_IV]] : (index) -> i32
+! COMMON: fir.store %[[K_IV_CONV]] to %[[ORIG_K_DECL]]#0
+! COMMON: }
+! COMMON: }
+! COMMON: }
+! COMMON: omp.yield
+! COMMON: }
+! COMMON: }
+! COMMON: omp.terminator
+! COMMON: }
More information about the llvm-branch-commits
mailing list