[flang-commits] [flang] [flang][OpenMP] Implement copyin for pointers and allocatables. (PR #107425)
David Truby via flang-commits
flang-commits at lists.llvm.org
Mon Sep 9 06:49:30 PDT 2024
https://github.com/DavidTruby updated https://github.com/llvm/llvm-project/pull/107425
>From ee837f61369fdfd1322df53fd47b7cd4da3946f0 Mon Sep 17 00:00:00 2001
From: David Truby <david.truby at arm.com>
Date: Thu, 5 Sep 2024 14:22:58 +0100
Subject: [PATCH 1/2] [flang][OpenMP] Implement copyin for pointers and
allocatables.
The copyin clause currently forbids pointer and allocatable variables,
which are allowed by the OpenMP 1.1 and 3.0 specifications respectively.
Since #106559 it is sufficient to remove the TODO check to get correct
behaviour.
---
flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 4 +-
flang/test/Lower/OpenMP/copyin.f90 | 59 ++++++++++++++++++++++
2 files changed, 60 insertions(+), 3 deletions(-)
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 6dee31ddb6963d..3f54234b176e3f 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -619,9 +619,7 @@ bool ClauseProcessor::processCopyin() const {
checkAndCopyHostAssociateVar(&*mem, &insPt);
break;
}
- if (semantics::IsAllocatableOrObjectPointer(&sym->GetUltimate()))
- TODO(converter.getCurrentLocation(),
- "pointer or allocatable variables in Copyin clause");
+
assert(sym->has<semantics::HostAssocDetails>() &&
"No host-association found");
checkAndCopyHostAssociateVar(sym);
diff --git a/flang/test/Lower/OpenMP/copyin.f90 b/flang/test/Lower/OpenMP/copyin.f90
index 4023987a841b82..c4c69657f45928 100644
--- a/flang/test/Lower/OpenMP/copyin.f90
+++ b/flang/test/Lower/OpenMP/copyin.f90
@@ -356,3 +356,62 @@ subroutine common_2()
end do
!$omp end parallel do
end subroutine
+
+! CHECK-LABEL: func.func @_QPpointer() {
+! CHECK: %[[VAL_0:.*]] = fir.address_of(@_QFpointerEp) : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFpointerEp"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>)
+! CHECK: %[[VAL_2:.*]] = omp.threadprivate %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFpointerEp"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>)
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_4:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>> {pinned}
+! CHECK: %[[VAL_5:.*]] = omp.threadprivate %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFpointerEp"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>)
+! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK: fir.store %[[VAL_7]] to %[[VAL_6]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK: omp.barrier
+! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.copy_in %[[VAL_8]] to %[[VAL_4]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.box<!fir.ptr<!fir.array<?xi32>>>, i1)
+! CHECK: %[[VAL_10:.*]] = fir.box_addr %[[VAL_9]]#0 : (!fir.box<!fir.ptr<!fir.array<?xi32>>>) -> !fir.ptr<!fir.array<?xi32>>
+! CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (!fir.ptr<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+! CHECK: fir.call @_QPsub7(%[[VAL_11]]) fastmath<contract> : (!fir.ref<!fir.array<?xi32>>) -> ()
+! CHECK: hlfir.copy_out %[[VAL_4]], %[[VAL_9]]#1 to %[[VAL_8]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, i1, !fir.box<!fir.ptr<!fir.array<?xi32>>>) -> ()
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+subroutine pointer()
+ integer, pointer, save :: p(:)
+ !$omp threadprivate(p)
+
+ !$omp parallel copyin(p)
+ call sub7(p)
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPallocatable() {
+! CHECK: %[[VAL_0:.*]] = fir.address_of(@_QFallocatableEp) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatableEp"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[VAL_2:.*]] = omp.threadprivate %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatableEp"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_4:.*]] = omp.threadprivate %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatableEp"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_5]]#0 realloc : !fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: omp.barrier
+! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_8:.*]] = fir.box_addr %[[VAL_7]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+! CHECK: fir.call @_QPsub8(%[[VAL_9]]) fastmath<contract> : (!fir.ref<!fir.array<?xi32>>) -> ()
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+subroutine allocatable()
+ integer, allocatable, save :: p(:)
+ !$omp threadprivate(p)
+
+ !$omp parallel copyin(p)
+ call sub8(p)
+ !$omp end parallel
+end subroutine
>From 9bd09f531fdd3ec26dc437b84ef0eafbb7e177e4 Mon Sep 17 00:00:00 2001
From: David Truby <david.truby at arm.com>
Date: Mon, 9 Sep 2024 14:47:31 +0100
Subject: [PATCH 2/2] update copyVarHLFIR with correct behaviour for copyin
---
flang/lib/Lower/Bridge.cpp | 7 +++---
flang/test/Lower/OpenMP/copyin.f90 | 35 ++++++++++++++++++++++++++++++
2 files changed, 39 insertions(+), 3 deletions(-)
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 7cdecb788425a0..0c0f81adb00eaf 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -1261,9 +1261,10 @@ class FirConverter : public Fortran::lower::AbstractConverter {
auto loadVal = builder->create<fir::LoadOp>(loc, rhs);
builder->create<fir::StoreOp>(loc, loadVal, lhs);
} else if (isAllocatable &&
- flags.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) {
- // For firstprivate allocatable variables, RHS must be copied only when
- // LHS is allocated.
+ (flags.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate) ||
+ flags.test(Fortran::semantics::Symbol::Flag::OmpCopyIn))) {
+ // For firstprivate and copyin allocatable variables, RHS must be copied
+ // only when LHS is allocated.
hlfir::Entity temp =
hlfir::derefPointersAndAllocatables(loc, *builder, lhs);
mlir::Value addr = hlfir::genVariableRawAddress(loc, *builder, temp);
diff --git a/flang/test/Lower/OpenMP/copyin.f90 b/flang/test/Lower/OpenMP/copyin.f90
index c4c69657f45928..b1c6b9420f4c46 100644
--- a/flang/test/Lower/OpenMP/copyin.f90
+++ b/flang/test/Lower/OpenMP/copyin.f90
@@ -415,3 +415,38 @@ subroutine allocatable()
call sub8(p)
!$omp end parallel
end subroutine
+
+! CHECK-LABEL: func.func @_QPallocatable2() {
+! CHECK: %[[VAL_0:.*]] = fir.address_of(@_QFallocatable2Ea) : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatable2Ea"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
+! CHECK: %[[VAL_2:.*]] = omp.threadprivate %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>> -> !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatable2Ea"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_4:.*]] = omp.threadprivate %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>> -> !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatable2Ea"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
+! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK: %[[VAL_7:.*]] = fir.box_addr %[[VAL_6]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (!fir.heap<i32>) -> i64
+! CHECK: %[[VAL_9:.*]] = arith.constant 0 : i64
+! CHECK: %[[VAL_10:.*]] = arith.cmpi ne, %[[VAL_8]], %[[VAL_9]] : i64
+! CHECK: fir.if %[[VAL_10]] {
+! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK: %[[VAL_12:.*]] = fir.box_addr %[[VAL_11]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]] : !fir.heap<i32>
+! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_5]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK: }
+! CHECK: omp.barrier
+! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_5]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
+subroutine allocatable2()
+ integer, allocatable, save :: a
+ !$omp threadprivate(a)
+
+ !$omp parallel copyin(a)
+ a = 1
+ !$omp end parallel
+end subroutine
More information about the flang-commits
mailing list