[flang-commits] [flang] [flang][OpenMP][RFC] Add support for COPYPRIVATE (PR #73128)

Leandro Lupori via flang-commits flang-commits at lists.llvm.org
Fri Nov 24 04:36:11 PST 2023


================
@@ -0,0 +1,244 @@
+! Test COPYPRIVATE.
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+!CHECK-LABEL: func @_QPtest_tp
+!CHECK:       %[[SYNC_VAR_ADDR:.*]] = fir.alloca f32 {bindc_name = "a", pinned, uniq_name = "_QFtest_tpEa"}
+!CHECK:       %[[SYNC_VAR:.*]]:2 = hlfir.declare %[[SYNC_VAR_ADDR]] {uniq_name = "_QFtest_tpEa"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:       omp.single {
+!CHECK:         hlfir.assign %{{.*}} to %[[SYNC_VAR]]#0 temporary_lhs : f32, !fir.ref<f32>
+!CHECK-NEXT:    omp.terminator
+!CHECK-NEXT:  }
+!CHECK-NEXT:  %[[TMP:.*]] = fir.load %[[SYNC_VAR]]#0 : !fir.ref<f32>
+!CHECK-NEXT:  omp.barrier
+!CHECK-NEXT:  hlfir.assign %[[TMP]] to %{{.*}}#1 temporary_lhs : f32, !fir.ref<f32>
+!CHECK-NEXT:  omp.barrier
+subroutine test_tp()
+  real(4), save :: a = 2.5
+  !$omp threadprivate(a)
+
+  !$omp single
+  a = 1.5
+  !$omp end single copyprivate(a)
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_priv
+!CHECK:       %[[ORIG_VAR:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_privEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:       %[[SYNC_VAR:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_privEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:       omp.parallel {
+!CHECK:         omp.single {
+!CHECK:           hlfir.assign %{{.*}} to %[[SYNC_VAR]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT:      omp.terminator
+!CHECK-NEXT:    }
+!CHECK-NEXT:    %[[TMP:.*]] = fir.load %[[SYNC_VAR]]#0 : !fir.ref<i32>
+!CHECK-NEXT:    omp.barrier
+!CHECK-NEXT:    hlfir.assign %[[TMP]] to %{{.*}}#1 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT:    omp.barrier
+!CHECK:       }
+subroutine test_priv()
+  integer :: i
+
+  i = 11
+  !$omp parallel firstprivate(i)
+  !$omp single
+  i = i + 1
+  !$omp end single copyprivate(i)
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_array
+!CHECK:       %[[ORIG_VAR:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+!CHECK:       %[[SYNC_VAR:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+!CHECK:       omp.parallel {
+!CHECK:         omp.single {
+!CHECK:           hlfir.assign %{{.*}}#1 to %[[SYNC_VAR]]#0 temporary_lhs : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
+!CHECK-NEXT:      omp.terminator
+!CHECK-NEXT:    }
+!CHECK-NEXT:    hlfir.assign %[[SYNC_VAR]]#0 to %{{.*}}#1 temporary_lhs : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
+!CHECK-NEXT:    omp.barrier
+!CHECK:       }
+subroutine test_array()
+  integer :: a(10), i
+
+  a = -1
+  !$omp parallel firstprivate(a)
+  !$omp single
+  do i = 1, 5
+    a(i) = i * 10
+  end do
+  !$omp end single copyprivate(a)
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_type
+!CHECK:       %[[ORIG_VAR:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_typeEt"} : (!fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>) -> (!fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>, !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>)
+!CHECK:       %[[SYNC_VAR:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_typeEt"} : (!fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>) -> (!fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>, !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>)
+!CHECK:       omp.parallel {
+!CHECK:         omp.single {
+!CHECK:           hlfir.assign %{{.*}}#1 to %[[SYNC_VAR]]#0 temporary_lhs : !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>, !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>
+!CHECK-NEXT:      omp.terminator
+!CHECK-NEXT:    }
+!CHECK-NEXT:    hlfir.assign %[[SYNC_VAR]]#0 to %{{.*}}#1 temporary_lhs : !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>, !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>
+!CHECK-NEXT:    omp.barrier
+!CHECK:       }
+subroutine test_type()
+  type ty
+    integer :: i
+    real :: r
+    integer, dimension(10) :: a
+  end type
+
+  integer :: i
+  type(ty) :: t
+
+  t%i = -1
+  t%r = -1.5
+  t%a = -1
+  !$omp parallel firstprivate(t)
+  !$omp single
+  t%i = 42
+  t%r = 3.14
+  do i = 1, 5
+    t%a(i) = i * 10
+  end do
+  !$omp end single copyprivate(t)
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_multi
+!CHECK:       %[[I_SYNC_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFtest_multiEi"}
+!CHECK:       %[[I_SYNC:.*]]:2 = hlfir.declare %[[I_SYNC_ADDR]] {uniq_name = "_QFtest_multiEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:       %[[J_SYNC_ADDR:.*]] = fir.alloca i32 {bindc_name = "j", pinned, uniq_name = "_QFtest_multiEj"}
+!CHECK:       %[[J_SYNC:.*]]:2 = hlfir.declare %[[J_SYNC_ADDR]] {uniq_name = "_QFtest_multiEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:       %[[K_SYNC_ADDR:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFtest_multiEk"}
+!CHECK:       %[[K_SYNC:.*]]:2 = hlfir.declare %[[K_SYNC_ADDR]] {uniq_name = "_QFtest_multiEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:       omp.parallel {
+!CHECK:         omp.single {
+!CHECK:           %[[I:.*]] = fir.load %{{.*}}#1 : !fir.ref<i32>
+!CHECK:           hlfir.assign %[[I]] to %[[I_SYNC]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK:           %[[J:.*]] = fir.load %{{.*}}#1 : !fir.ref<i32>
+!CHECK:           hlfir.assign %[[J]] to %[[J_SYNC]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK:           %[[K:.*]] = fir.load %{{.*}}#1 : !fir.ref<i32>
+!CHECK:           hlfir.assign %[[K]] to %[[K_SYNC]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT:      omp.terminator
+!CHECK-NEXT:    }
+!CHECK-NEXT:    %[[K:.*]] = fir.load %[[K_SYNC]]#0 : !fir.ref<i32>
+!CHECK-NEXT:    omp.barrier
+!CHECK-NEXT:    hlfir.assign %[[K]] to %{{.*}}#1 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT:    omp.barrier
----------------
luporl wrote:

@NimishMishra Thanks for the review! That's right, this is the part that inserts 2 barriers.
The problem is that, if I reduce it to one barrier, I get incorrect results when using copyprivate with allocatable arrays (tested on AArch64). It should be possible to refactor the code to generate all loads of the copyprivate list, then a barrier, then all assigns and then another barrier.

But, to be honest, I still don't understand why it only works with 2 barriers. I will try to investigate this further, to see if there is a better solution.

https://github.com/llvm/llvm-project/pull/73128


More information about the flang-commits mailing list