[flang-commits] [flang] [flang][OpenMP][RFC] Add support for COPYPRIVATE (PR #73128)
Leandro Lupori via flang-commits
flang-commits at lists.llvm.org
Thu Nov 23 10:36:43 PST 2023
================
@@ -0,0 +1,244 @@
+! Test COPYPRIVATE.
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+!CHECK-LABEL: func @_QPtest_tp
+!CHECK: %[[SYNC_VAR_ADDR:.*]] = fir.alloca f32 {bindc_name = "a", pinned, uniq_name = "_QFtest_tpEa"}
+!CHECK: %[[SYNC_VAR:.*]]:2 = hlfir.declare %[[SYNC_VAR_ADDR]] {uniq_name = "_QFtest_tpEa"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK: omp.single {
+!CHECK: hlfir.assign %{{.*}} to %[[SYNC_VAR]]#0 temporary_lhs : f32, !fir.ref<f32>
+!CHECK-NEXT: omp.terminator
+!CHECK-NEXT: }
+!CHECK-NEXT: %[[TMP:.*]] = fir.load %[[SYNC_VAR]]#0 : !fir.ref<f32>
+!CHECK-NEXT: omp.barrier
+!CHECK-NEXT: hlfir.assign %[[TMP]] to %{{.*}}#1 temporary_lhs : f32, !fir.ref<f32>
+!CHECK-NEXT: omp.barrier
+subroutine test_tp()
+ real(4), save :: a = 2.5
+ !$omp threadprivate(a)
+
+ !$omp single
+ a = 1.5
+ !$omp end single copyprivate(a)
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_priv
+!CHECK: %[[ORIG_VAR:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_privEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[SYNC_VAR:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_privEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.parallel {
+!CHECK: omp.single {
+!CHECK: hlfir.assign %{{.*}} to %[[SYNC_VAR]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT: omp.terminator
+!CHECK-NEXT: }
+!CHECK-NEXT: %[[TMP:.*]] = fir.load %[[SYNC_VAR]]#0 : !fir.ref<i32>
+!CHECK-NEXT: omp.barrier
+!CHECK-NEXT: hlfir.assign %[[TMP]] to %{{.*}}#1 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT: omp.barrier
+!CHECK: }
+subroutine test_priv()
+ integer :: i
+
+ i = 11
+ !$omp parallel firstprivate(i)
+ !$omp single
+ i = i + 1
+ !$omp end single copyprivate(i)
+ !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_array
+!CHECK: %[[ORIG_VAR:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+!CHECK: %[[SYNC_VAR:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+!CHECK: omp.parallel {
+!CHECK: omp.single {
+!CHECK: hlfir.assign %{{.*}}#1 to %[[SYNC_VAR]]#0 temporary_lhs : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
+!CHECK-NEXT: omp.terminator
+!CHECK-NEXT: }
+!CHECK-NEXT: hlfir.assign %[[SYNC_VAR]]#0 to %{{.*}}#1 temporary_lhs : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
+!CHECK-NEXT: omp.barrier
+!CHECK: }
+subroutine test_array()
+ integer :: a(10), i
+
+ a = -1
+ !$omp parallel firstprivate(a)
+ !$omp single
+ do i = 1, 5
+ a(i) = i * 10
+ end do
+ !$omp end single copyprivate(a)
+ !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_type
+!CHECK: %[[ORIG_VAR:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_typeEt"} : (!fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>) -> (!fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>, !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>)
+!CHECK: %[[SYNC_VAR:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_typeEt"} : (!fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>) -> (!fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>, !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>)
+!CHECK: omp.parallel {
+!CHECK: omp.single {
+!CHECK: hlfir.assign %{{.*}}#1 to %[[SYNC_VAR]]#0 temporary_lhs : !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>, !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>
+!CHECK-NEXT: omp.terminator
+!CHECK-NEXT: }
+!CHECK-NEXT: hlfir.assign %[[SYNC_VAR]]#0 to %{{.*}}#1 temporary_lhs : !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>, !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>
+!CHECK-NEXT: omp.barrier
+!CHECK: }
+subroutine test_type()
+ type ty
+ integer :: i
+ real :: r
+ integer, dimension(10) :: a
+ end type
+
+ integer :: i
+ type(ty) :: t
+
+ t%i = -1
+ t%r = -1.5
+ t%a = -1
+ !$omp parallel firstprivate(t)
+ !$omp single
+ t%i = 42
+ t%r = 3.14
+ do i = 1, 5
+ t%a(i) = i * 10
+ end do
+ !$omp end single copyprivate(t)
+ !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_multi
+!CHECK: %[[I_SYNC_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFtest_multiEi"}
+!CHECK: %[[I_SYNC:.*]]:2 = hlfir.declare %[[I_SYNC_ADDR]] {uniq_name = "_QFtest_multiEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[J_SYNC_ADDR:.*]] = fir.alloca i32 {bindc_name = "j", pinned, uniq_name = "_QFtest_multiEj"}
+!CHECK: %[[J_SYNC:.*]]:2 = hlfir.declare %[[J_SYNC_ADDR]] {uniq_name = "_QFtest_multiEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[K_SYNC_ADDR:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFtest_multiEk"}
+!CHECK: %[[K_SYNC:.*]]:2 = hlfir.declare %[[K_SYNC_ADDR]] {uniq_name = "_QFtest_multiEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.parallel {
+!CHECK: omp.single {
+!CHECK: %[[I:.*]] = fir.load %{{.*}}#1 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[I]] to %[[I_SYNC]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[J:.*]] = fir.load %{{.*}}#1 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[J]] to %[[J_SYNC]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[K:.*]] = fir.load %{{.*}}#1 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[K]] to %[[K_SYNC]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT: omp.terminator
+!CHECK-NEXT: }
+!CHECK-NEXT: %[[K:.*]] = fir.load %[[K_SYNC]]#0 : !fir.ref<i32>
+!CHECK-NEXT: omp.barrier
+!CHECK-NEXT: hlfir.assign %[[K]] to %{{.*}}#1 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT: omp.barrier
----------------
luporl wrote:
Yes. I'll try to group the loads and assigns, to reduce it to two per copyprivate clause.
Actually, the `omp.barrier` could probably be replaced by a regular memory barrier here, but I couldn't find a `fir` operation for it.
In fact, I'm a bit surprised these barriers are required at all. I expected the end of `single` to synchronize memory among all threads and that no more barriers would be needed after it, for the loads and stores in each thread.
But in practice, the lack of a barrier after an allocatable array load makes its old extent be used in `hlfir.assign`, resulting in a runtime error, and the lack of a barrier after the assign causes old values to be used in a subsequent print statement, for instance.
I have only noticed this issue with allocatable arrays so far.
https://github.com/llvm/llvm-project/pull/73128
More information about the flang-commits
mailing list