[flang-commits] [flang] [flang][MLIR][OpenMP] Extend delayed privatization for arrays (PR #85023)
Kareem Ergawy via flang-commits
flang-commits at lists.llvm.org
Wed Mar 13 01:48:42 PDT 2024
https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/85023
>From 5f37252e1f70e723a3244854b22bbc98f58648f4 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Mon, 11 Mar 2024 04:39:51 -0500
Subject: [PATCH 1/4] [flang][MLIR][OpenMP] Extend delayed privatization for
scalar allocatables
One more step in extending support for delayed privatization. This diff
adds support for scalar allocatables.
---
.../flang/Optimizer/Builder/BoxValue.h | 11 +++++
flang/lib/Lower/Bridge.cpp | 24 ++++++-----
.../lib/Lower/OpenMP/DataSharingProcessor.cpp | 8 ++--
...privatization-allocatable-firstprivate.f90 | 33 +++++++++++++++
...ayed-privatization-allocatable-private.f90 | 40 +++++++++++++++++++
5 files changed, 103 insertions(+), 13 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
diff --git a/flang/include/flang/Optimizer/Builder/BoxValue.h b/flang/include/flang/Optimizer/Builder/BoxValue.h
index 2fed2d48a7a080..9b84bfb5ab0bf7 100644
--- a/flang/include/flang/Optimizer/Builder/BoxValue.h
+++ b/flang/include/flang/Optimizer/Builder/BoxValue.h
@@ -535,6 +535,17 @@ class ExtendedValue : public details::matcher<ExtendedValue> {
const VT &matchee() const { return box; }
+ /// Clone an ExtendedValue to a new instance changing the base address.
+ ///
+ /// TODO So far, this is only a shallow clone; only the base address is
+ /// replaced. This will probably be extended to implement deep cloning to
+ /// support scenarios such as delayed privatization for ArrayBoxValue's.
+ ExtendedValue clone(mlir::Value newBase) const {
+ return match(
+ [&](const UnboxedValue &box) -> ExtendedValue { return newBase; },
+ [&](const auto &box) -> ExtendedValue { return box.clone(newBase); });
+ }
+
private:
VT box;
};
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index a668ba4116faab..5fa914d087f020 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -748,7 +748,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
void copyVar(mlir::Location loc, mlir::Value dst,
mlir::Value src) override final {
- copyVarHLFIR(loc, dst, src);
+ copyVarHLFIR(loc, Fortran::lower::SymbolBox::Intrinsic{dst},
+ Fortran::lower::SymbolBox::Intrinsic{src});
}
void copyHostAssociateVar(
@@ -1009,10 +1010,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
// `omp.private`'s `alloc` block. If this is the case, we return this
// `SymbolBox::Intrinsic` value.
if (Fortran::lower::SymbolBox v = symMap->lookupSymbol(sym))
- return v.match(
- [&](const Fortran::lower::SymbolBox::Intrinsic &)
- -> Fortran::lower::SymbolBox { return v; },
- [](const auto &) -> Fortran::lower::SymbolBox { return {}; });
+ return v;
return {};
}
@@ -1060,15 +1058,16 @@ class FirConverter : public Fortran::lower::AbstractConverter {
const Fortran::lower::SymbolBox &rhs_sb) {
mlir::Location loc = genLocation(sym.name());
if (lowerToHighLevelFIR())
- copyVarHLFIR(loc, lhs_sb.getAddr(), rhs_sb.getAddr());
+ copyVarHLFIR(loc, lhs_sb, rhs_sb);
else
copyVarFIR(loc, sym, lhs_sb, rhs_sb);
}
- void copyVarHLFIR(mlir::Location loc, mlir::Value dst, mlir::Value src) {
+ void copyVarHLFIR(mlir::Location loc, Fortran::lower::SymbolBox dst,
+ Fortran::lower::SymbolBox src) {
assert(lowerToHighLevelFIR());
- hlfir::Entity lhs{dst};
- hlfir::Entity rhs{src};
+ hlfir::Entity lhs{dst.getAddr()};
+ hlfir::Entity rhs{src.getAddr()};
// Temporary_lhs is set to true in hlfir.assign below to avoid user
// assignment to be used and finalization to be called on the LHS.
// This may or may not be correct but mimics the current behaviour
@@ -1082,7 +1081,12 @@ class FirConverter : public Fortran::lower::AbstractConverter {
/*keepLhsLengthInAllocatableAssignment=*/false,
/*temporary_lhs=*/true);
};
- if (lhs.isAllocatable()) {
+
+ bool isBoxAllocatable = dst.match(
+ [](const fir::MutableBoxValue &box) { return box.isAllocatable(); },
+ [](const auto &box) { return false; });
+
+ if (isBoxAllocatable) {
// Deep copy allocatable if it is allocated.
// Note that when allocated, the RHS is already allocated with the LHS
// shape for copy on entry in createHostAssociateVarClone.
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 717b8cc0276a30..bee533949c6564 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -50,6 +50,7 @@ void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
}
void DataSharingProcessor::insertDeallocs() {
+ // TODO Extend delayed privatization to include a `dealloc` region?
for (const Fortran::semantics::Symbol *sym : privatizedSymbols)
if (Fortran::semantics::IsAllocatable(sym->GetUltimate())) {
converter.createHostAssociateVarCloneDealloc(*sym);
@@ -376,6 +377,7 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
symLoc, uniquePrivatizerName, symType,
isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate
: mlir::omp::DataSharingClauseType::Private);
+ fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym);
symTable->pushScope();
@@ -386,7 +388,7 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
&allocRegion, /*insertPt=*/{}, symType, symLoc);
firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
- symTable->addSymbol(*sym, allocRegion.getArgument(0));
+ symTable->addSymbol(*sym, symExV.clone(allocRegion.getArgument(0)));
symTable->pushScope();
cloneSymbol(sym);
firOpBuilder.create<mlir::omp::YieldOp>(
@@ -403,10 +405,10 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
mlir::Block *copyEntryBlock = firOpBuilder.createBlock(
©Region, /*insertPt=*/{}, {symType, symType}, {symLoc, symLoc});
firOpBuilder.setInsertionPointToEnd(copyEntryBlock);
- symTable->addSymbol(*sym, copyRegion.getArgument(0),
+ symTable->addSymbol(*sym, symExV.clone(copyRegion.getArgument(0)),
/*force=*/true);
symTable->pushScope();
- symTable->addSymbol(*sym, copyRegion.getArgument(1));
+ symTable->addSymbol(*sym, symExV.clone(copyRegion.getArgument(1)));
auto ip = firOpBuilder.saveInsertionPoint();
copyFirstPrivateSymbol(sym, &ip);
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
new file mode 100644
index 00000000000000..545221b4c1abfe
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
@@ -0,0 +1,33 @@
+! Test delayed privatization for allocatables: `firstprivate`.
+
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+
+subroutine delayed_privatization_allocatable
+ implicit none
+ integer, allocatable :: var1
+
+!$omp parallel firstprivate(var1)
+ var1 = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.heap<i32>>>]] alloc {
+
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK: } copy {
+! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK-NEXT: %[[PRIV_BASE_VAL:.*]] = fir.load %[[PRIV_PRIV_ARG]]
+! CHECK-NEXT: %[[PRIV_BASE_BOX:.*]] = fir.box_addr %[[PRIV_BASE_VAL]]
+! CHECK-NEXT: %[[PRIV_BASE_ADDR:.*]] = fir.convert %[[PRIV_BASE_BOX]]
+! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT: %[[COPY_COND:.*]] = arith.cmpi ne, %[[PRIV_BASE_ADDR]], %[[C0]] : i64
+
+! CHECK-NEXT: fir.if %[[COPY_COND]] {
+! CHECK-NEXT: %[[ORIG_BASE_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]]
+! CHECK-NEXT: %[[ORIG_BASE_ADDR:.*]] = fir.box_addr %[[ORIG_BASE_VAL]]
+! CHECK-NEXT: %[[ORIG_BASE_LD:.*]] = fir.load %[[ORIG_BASE_ADDR]]
+! CHECK-NEXT: hlfir.assign %[[ORIG_BASE_LD]] to %[[PRIV_BASE_BOX]] temporary_lhs
+! CHECK-NEXT: }
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
new file mode 100644
index 00000000000000..83fb516879ff20
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
@@ -0,0 +1,40 @@
+! Test delayed privatization for allocatables: `private`.
+
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+
+subroutine delayed_privatization_allocatable
+ implicit none
+ integer, allocatable :: var1
+
+!$omp parallel private(var1)
+ var1 = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = private}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.heap<i32>>>]] alloc {
+
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<i32>> {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_allocatableEvar1"}
+
+! CHECK-NEXT: %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK-NEXT: %[[PRIV_ARG_BOX:.*]] = fir.box_addr %[[PRIV_ARG_VAL]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+! CHECK-NEXT: %[[PRIV_ARG_ADDR:.*]] = fir.convert %[[PRIV_ARG_BOX]] : (!fir.heap<i32>) -> i64
+! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT: %[[ALLOC_COND:.*]] = arith.cmpi ne, %[[PRIV_ARG_ADDR]], %[[C0]] : i64
+
+! CHECK-NEXT: fir.if %[[ALLOC_COND]] {
+! CHECK-NEXT: %[[PRIV_ALLOCMEM:.*]] = fir.allocmem i32 {fir.must_be_heap = true, uniq_name = "_QFdelayed_privatization_allocatableEvar1.alloc"}
+! CHECK-NEXT: %[[PRIV_ALLOCMEM_BOX:.*]] = fir.embox %[[PRIV_ALLOCMEM]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+! CHECK-NEXT: fir.store %[[PRIV_ALLOCMEM_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK-NEXT: } else {
+! CHECK-NEXT: %[[ZERO_BITS:.*]] = fir.zero_bits !fir.heap<i32>
+! CHECK-NEXT: %[[ZERO_BOX:.*]] = fir.embox %[[ZERO_BITS]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+! CHECK-NEXT: fir.store %[[ZERO_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK-NEXT: }
+
+! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]]
+! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])
+
+! CHECK-NEXT: }
>From 0888d7a4736c3eac4f4990d1b2317f61b4fc6e56 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Tue, 12 Mar 2024 03:02:33 -0500
Subject: [PATCH 2/4] Handle review comments.
---
flang/include/flang/Optimizer/Builder/BoxValue.h | 11 -----------
flang/lib/Lower/OpenMP/DataSharingProcessor.cpp | 11 +++++++----
...delayed-privatization-allocatable-firstprivate.f90 | 5 ++++-
.../delayed-privatization-allocatable-private.f90 | 5 ++++-
4 files changed, 15 insertions(+), 17 deletions(-)
diff --git a/flang/include/flang/Optimizer/Builder/BoxValue.h b/flang/include/flang/Optimizer/Builder/BoxValue.h
index 9b84bfb5ab0bf7..2fed2d48a7a080 100644
--- a/flang/include/flang/Optimizer/Builder/BoxValue.h
+++ b/flang/include/flang/Optimizer/Builder/BoxValue.h
@@ -535,17 +535,6 @@ class ExtendedValue : public details::matcher<ExtendedValue> {
const VT &matchee() const { return box; }
- /// Clone an ExtendedValue to a new instance changing the base address.
- ///
- /// TODO So far, this is only a shallow clone; only the base address is
- /// replaced. This will probably be extended to implement deep cloning to
- /// support scenarios such as delayed privatization for ArrayBoxValue's.
- ExtendedValue clone(mlir::Value newBase) const {
- return match(
- [&](const UnboxedValue &box) -> ExtendedValue { return newBase; },
- [&](const auto &box) -> ExtendedValue { return box.clone(newBase); });
- }
-
private:
VT box;
};
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index bee533949c6564..ec391ec8b06f7e 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -50,7 +50,7 @@ void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
}
void DataSharingProcessor::insertDeallocs() {
- // TODO Extend delayed privatization to include a `dealloc` region?
+ // TODO Extend delayed privatization to include a `dealloc` region.
for (const Fortran::semantics::Symbol *sym : privatizedSymbols)
if (Fortran::semantics::IsAllocatable(sym->GetUltimate())) {
converter.createHostAssociateVarCloneDealloc(*sym);
@@ -388,7 +388,8 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
&allocRegion, /*insertPt=*/{}, symType, symLoc);
firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
- symTable->addSymbol(*sym, symExV.clone(allocRegion.getArgument(0)));
+ symTable->addSymbol(*sym,
+ fir::substBase(symExV, allocRegion.getArgument(0)));
symTable->pushScope();
cloneSymbol(sym);
firOpBuilder.create<mlir::omp::YieldOp>(
@@ -405,10 +406,12 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
mlir::Block *copyEntryBlock = firOpBuilder.createBlock(
©Region, /*insertPt=*/{}, {symType, symType}, {symLoc, symLoc});
firOpBuilder.setInsertionPointToEnd(copyEntryBlock);
- symTable->addSymbol(*sym, symExV.clone(copyRegion.getArgument(0)),
+ symTable->addSymbol(*sym,
+ fir::substBase(symExV, copyRegion.getArgument(0)),
/*force=*/true);
symTable->pushScope();
- symTable->addSymbol(*sym, symExV.clone(copyRegion.getArgument(1)));
+ symTable->addSymbol(*sym,
+ fir::substBase(symExV, copyRegion.getArgument(1)));
auto ip = firOpBuilder.saveInsertionPoint();
copyFirstPrivateSymbol(sym, &ip);
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
index 545221b4c1abfe..aa835f82b2730b 100644
--- a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
@@ -1,6 +1,9 @@
! Test delayed privatization for allocatables: `firstprivate`.
-! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN: -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 |\
+! RUN: FileCheck %s
subroutine delayed_privatization_allocatable
implicit none
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
index 83fb516879ff20..cc1818b00b809c 100644
--- a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
@@ -1,6 +1,9 @@
! Test delayed privatization for allocatables: `private`.
-! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN: -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 |\
+! RUN: FileCheck %s
subroutine delayed_privatization_allocatable
implicit none
>From 306e090d531cc424d0d4495b4fa3b2b993c5a7c3 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Tue, 12 Mar 2024 04:21:01 -0500
Subject: [PATCH 3/4] Add support for scalar pointers.
---
flang/lib/Lower/Bridge.cpp | 12 ++++++-
.../OpenMP/delayed-privatization-pointer.f90 | 31 +++++++++++++++++++
2 files changed, 42 insertions(+), 1 deletion(-)
create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-pointer.f90
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 5fa914d087f020..bc2e2adcb57c7b 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -1084,6 +1084,16 @@ class FirConverter : public Fortran::lower::AbstractConverter {
bool isBoxAllocatable = dst.match(
[](const fir::MutableBoxValue &box) { return box.isAllocatable(); },
+ [](const fir::FortranVariableOpInterface &box) {
+ return fir::FortranVariableOpInterface(box).isAllocatable();
+ },
+ [](const auto &box) { return false; });
+
+ bool isBoxPointer = dst.match(
+ [](const fir::MutableBoxValue &box) { return box.isPointer(); },
+ [](const fir::FortranVariableOpInterface &box) {
+ return fir::FortranVariableOpInterface(box).isPointer();
+ },
[](const auto &box) { return false; });
if (isBoxAllocatable) {
@@ -1101,7 +1111,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
copyData(lhs, rhs);
})
.end();
- } else if (lhs.isPointer()) {
+ } else if (isBoxPointer) {
// Set LHS target to the target of RHS (do not copy the RHS
// target data into the LHS target storage).
auto loadVal = builder->create<fir::LoadOp>(loc, rhs);
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-pointer.f90 b/flang/test/Lower/OpenMP/delayed-privatization-pointer.f90
new file mode 100644
index 00000000000000..796e4720c8c954
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-pointer.f90
@@ -0,0 +1,31 @@
+! Test delayed privatization for pointers: `private`.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN: -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 |\
+! RUN: FileCheck %s
+
+subroutine delayed_privatization_pointer
+ implicit none
+ integer, pointer :: var1
+
+!$omp parallel firstprivate(var1)
+ var1 = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.ptr<i32>>>]] alloc {
+
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.ptr<i32>> {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_pointerEvar1"}
+! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]]
+! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])
+
+! CHECK-NEXT: } copy {
+! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+! CHECK-NEXT: %[[ORIG_BASE_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]]
+ ! CHECK-NEXT: fir.store %[[ORIG_BASE_VAL]] to %[[PRIV_PRIV_ARG]] : !fir.ref<!fir.box<!fir.ptr<i32>>>
+! CHECK-NEXT: omp.yield(%[[PRIV_PRIV_ARG]] : [[TYPE]])
+! CHECK-NEXT: }
>From 523850ee096280a67e92e8f5f5ac3f0198bc95a3 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Wed, 13 Mar 2024 00:15:46 -0500
Subject: [PATCH 4/4] [flang][MLIR][OpenMP] Extend delayed privatization for
arrays
One more step in delayed privatization, this PR extends support for
arrays. In the delayed privatizer, a `fir.box_dims` operation is emitted
to retrieve the array bounds and extents. The result of these
`fir.box_dims` and the privatizer argument, are then used to create a
new `ExtendedValue`.
---
flang/lib/Lower/Bridge.cpp | 2 +
.../lib/Lower/OpenMP/DataSharingProcessor.cpp | 27 ++++++-
...elayed-privatization-allocatable-array.f90 | 67 +++++++++++++++++
.../OpenMP/delayed-privatization-array.f90 | 75 +++++++++++++++++++
4 files changed, 169 insertions(+), 2 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-allocatable-array.f90
create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-array.f90
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index bc2e2adcb57c7b..7c2c88246322a1 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -620,6 +620,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
mlir::Type hSymType = genType(hsym);
Fortran::lower::SymbolBox hsb =
lookupSymbol(hsym, /*symMap=*/nullptr, /*forceHlfirBase=*/true);
+ llvm::errs() << ">>>> hsb: " << hsb << "\n";
auto allocate = [&](llvm::ArrayRef<mlir::Value> shape,
llvm::ArrayRef<mlir::Value> typeParams) -> mlir::Value {
@@ -635,6 +636,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
};
fir::ExtendedValue hexv = symBoxToExtendedValue(hsb);
+ llvm::errs() << ">>>> hexv: " << hexv << "\n";
fir::ExtendedValue exv = hexv.match(
[&](const fir::BoxValue &box) -> fir::ExtendedValue {
const Fortran::semantics::DeclTypeSpec *type = sym.GetType();
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index ec391ec8b06f7e..2412c13e938075 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -388,8 +388,31 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
&allocRegion, /*insertPt=*/{}, symType, symLoc);
firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
- symTable->addSymbol(*sym,
- fir::substBase(symExV, allocRegion.getArgument(0)));
+
+ fir::ExtendedValue localExV = symExV.match(
+ [&](const fir::ArrayBoxValue &box) -> fir::ExtendedValue {
+ auto idxTy = firOpBuilder.getIndexType();
+ llvm::SmallVector<mlir::Value> extents;
+ llvm::SmallVector<mlir::Value> lBounds;
+
+ for (unsigned dim = 0; dim < box.getExtents().size(); ++dim) {
+ mlir::Value dimVal =
+ firOpBuilder.createIntegerConstant(symLoc, idxTy, dim);
+ fir::BoxDimsOp dimInfo = firOpBuilder.create<fir::BoxDimsOp>(
+ symLoc, idxTy, idxTy, idxTy, allocRegion.getArgument(0),
+ dimVal);
+ extents.push_back(dimInfo.getExtent());
+ lBounds.push_back(dimInfo.getLowerBound());
+ }
+
+ return fir::ArrayBoxValue(allocRegion.getArgument(0), extents,
+ lBounds);
+ },
+ [&](const auto &box) -> fir::ExtendedValue {
+ return fir::substBase(symExV, allocRegion.getArgument(0));
+ });
+
+ symTable->addSymbol(*sym, localExV);
symTable->pushScope();
cloneSymbol(sym);
firOpBuilder.create<mlir::omp::YieldOp>(
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-array.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-array.f90
new file mode 100644
index 00000000000000..47e163014fe868
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-array.f90
@@ -0,0 +1,67 @@
+! Test delayed privatization for allocatable arrays.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN: -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 |\
+! RUN: FileCheck %s
+
+subroutine delayed_privatization_private(var1, l1)
+ implicit none
+ integer(8):: l1
+ integer, allocatable, dimension(:) :: var1
+
+!$omp parallel firstprivate(var1)
+ var1(l1 + 1) = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.heap<!fir.array<\?xi32>>>>]] alloc {
+
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<{{\?}}xi32>>> {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_privateEvar1"}
+
+! CHECK-NEXT: %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]]
+! CHECK-NEXT: %[[PRIV_ARG_BOX:.*]] = fir.box_addr %[[PRIV_ARG_VAL]]
+! CHECK-NEXT: %[[PRIV_ARG_ADDR:.*]] = fir.convert %[[PRIV_ARG_BOX]]
+! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT: %[[ALLOC_COND:.*]] = arith.cmpi ne, %[[PRIV_ARG_ADDR]], %[[C0]] : i64
+
+! CHECK-NEXT: fir.if %[[ALLOC_COND]] {
+! CHECK-NEXT: %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]] : [[TYPE]]
+! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : index
+! CHECK-NEXT: %[[DIMS:.*]]:3 = fir.box_dims %[[PRIV_ARG_VAL]], %[[C0]]
+! CHECK-NEXT: fir.box_addr %[[PRIV_ARG_VAL]]
+! CHECK-NEXT: %[[C0_2:.*]] = arith.constant 0 : index
+! CHECK-NEXT: %[[CMP:.*]] = arith.cmpi sgt, %[[DIMS]]#1, %[[C0_2]] : index
+! CHECK-NEXT: %[[SELECT:.*]] = arith.select %[[CMP]], %[[DIMS]]#1, %[[C0_2]] : index
+! CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<?xi32>, %[[SELECT]]
+! CHECK-NEXT: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[DIMS]]#0, %[[SELECT]] : (index, index) -> !fir.shapeshift<1>
+! CHECK-NEXT: %[[EMBOX:.*]] = fir.embox %[[MEM]](%[[SHAPE_SHIFT]])
+! CHECK-NEXT: fir.store %[[EMBOX]] to %[[PRIV_ALLOC]]
+! CHECK-NEXT: } else {
+! CHECK-NEXT: %[[ZEROS:.*]] = fir.zero_bits
+! CHECK-NEXT: %[[C0_3:.*]] = arith.constant 0 : index
+! CHECK-NEXT: %[[SHAPE:.*]] = fir.shape %[[C0_3]] : (index) -> !fir.shape<1>
+! CHECK-NEXT: %[[EMBOX_2:.*]] = fir.embox %[[ZEROS]](%[[SHAPE]])
+! CHECK-NEXT: fir.store %[[EMBOX_2]] to %[[PRIV_ALLOC]]
+! CHECK-NEXT: }
+
+! CHECK-NEXT: hlfir.declare
+! CHECK-NEXT: omp.yield
+
+! CHECK-NEXT: } copy {
+! CHECK-NEXT: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+! CHECK-NEXT: %[[PRIV_BASE_VAL:.*]] = fir.load %[[PRIV_PRIV_ARG]]
+! CHECK-NEXT: %[[PRIV_BASE_BOX:.*]] = fir.box_addr %[[PRIV_BASE_VAL]]
+! CHECK-NEXT: %[[PRIV_BASE_ADDR:.*]] = fir.convert %[[PRIV_BASE_BOX]]
+! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT: %[[COPY_COND:.*]] = arith.cmpi ne, %[[PRIV_BASE_ADDR]], %[[C0]] : i64
+
+
+! CHECK-NEXT: fir.if %[[COPY_COND]] {
+! CHECK-NEXT: %[[PRIV_ORIG_ARG_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]]
+! CHECK-NEXT: hlfir.assign %[[PRIV_ORIG_ARG_VAL]] to %[[PRIV_BASE_VAL]] temporary_lhs
+! CHECK-NEXT: }
+! CHECK-NEXT: omp.yield
+! CHECK-NEXT: }
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-array.f90 b/flang/test/Lower/OpenMP/delayed-privatization-array.f90
new file mode 100644
index 00000000000000..23b61563781eb1
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-array.f90
@@ -0,0 +1,75 @@
+! Test delayed privatization for arrays.
+
+! RUN: split-file %s %t
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN: -o - %t/one_dim_array.f90 2>&1 | FileCheck %s --check-prefix=ONE_DIM
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - \
+! RUN: %t/one_dim_array.f90 2>&1 | FileCheck %s --check-prefix=ONE_DIM
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN: -o - %t/two_dim_array.f90 2>&1 | FileCheck %s --check-prefix=TWO_DIM
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %t/two_dim_array.f90 2>&1 |\
+! RUN: FileCheck %s --check-prefix=TWO_DIM
+
+!--- one_dim_array.f90
+subroutine delayed_privatization_private(var1, l1, u1)
+ implicit none
+ integer(8):: l1, u1
+ integer, dimension(l1:u1) :: var1
+
+!$omp parallel firstprivate(var1)
+ var1(l1 + 1) = 10
+!$omp end parallel
+end subroutine
+
+! ONE_DIM-LABEL: omp.private {type = firstprivate}
+! ONE_DIM-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.box<!fir.array<\?xi32>>]] alloc {
+
+! ONE_DIM-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! ONE_DIM-NEXT: %[[C0:.*]] = arith.constant 0 : index
+! ONE_DIM-NEXT: %[[DIMS:.*]]:3 = fir.box_dims %[[PRIV_ARG]], %[[C0]] : ([[TYPE]], index) -> (index, index, index)
+! ONE_DIM-NEXT: %[[PRIV_ALLOCA:.*]] = fir.alloca !fir.array<{{\?}}xi32>, %[[DIMS]]#1 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! ONE_DIM-NEXT: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[DIMS]]#0, %[[DIMS]]#1 : (index, index) -> !fir.shapeshift<1>
+! ONE_DIM-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOCA]](%[[SHAPE_SHIFT]]) {uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! ONE_DIM-NEXT: omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])
+
+! ONE_DIM-NEXT: } copy {
+! ONE_DIM-NEXT: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+! ONE_DIM-NEXT: hlfir.assign %[[PRIV_ORIG_ARG]] to %[[PRIV_PRIV_ARG]] temporary_lhs
+! ONE_DIM-NEXT: omp.yield(%[[PRIV_PRIV_ARG]] : [[TYPE]])
+! ONE_DIM-NEXT: }
+
+!--- two_dim_array.f90
+subroutine delayed_privatization_private(var1, l1, u1, l2, u2)
+ implicit none
+ integer(8):: l1, u1, l2, u2
+ integer, dimension(l1:u1, l2:u2) :: var1
+
+!$omp parallel firstprivate(var1)
+ var1(l1 + 1, u2) = 10
+!$omp end parallel
+end subroutine
+
+! TWO_DIM-LABEL: omp.private {type = firstprivate}
+! TWO_DIM-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.box<!fir.array<\?x\?xi32>>]] alloc {
+
+! TWO_DIM-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+! TWO_DIM-NEXT: %[[C0:.*]] = arith.constant 0 : index
+! TWO_DIM-NEXT: %[[DIMS0:.*]]:3 = fir.box_dims %[[PRIV_ARG]], %[[C0]] : ([[TYPE]], index) -> (index, index, index)
+
+! TWO_DIM-NEXT: %[[C1:.*]] = arith.constant 1 : index
+! TWO_DIM-NEXT: %[[DIMS1:.*]]:3 = fir.box_dims %[[PRIV_ARG]], %[[C1]] : ([[TYPE]], index) -> (index, index, index)
+
+! TWO_DIM-NEXT: %[[PRIV_ALLOCA:.*]] = fir.alloca !fir.array<{{\?}}x{{\?}}xi32>, %[[DIMS0]]#1, %[[DIMS1]]#1 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! TWO_DIM-NEXT: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[DIMS0]]#0, %[[DIMS0]]#1, %[[DIMS1]]#0, %[[DIMS1]]#1 : (index, index, index, index) -> !fir.shapeshift<2>
+
+! TWO_DIM-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOCA]](%[[SHAPE_SHIFT]]) {uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! TWO_DIM-NEXT: omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])
+
+! TWO_DIM-NEXT: } copy {
+! TWO_DIM-NEXT: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+! TWO_DIM-NEXT: hlfir.assign %[[PRIV_ORIG_ARG]] to %[[PRIV_PRIV_ARG]] temporary_lhs
+! TWO_DIM-NEXT: omp.yield(%[[PRIV_PRIV_ARG]] : [[TYPE]])
+! TWO_DIM-NEXT: }
More information about the flang-commits
mailing list