[flang-commits] [flang] [flang][MLIR][OpenMP] Extend delayed privatization for scalar allocatables (PR #84740)
Kareem Ergawy via flang-commits
flang-commits at lists.llvm.org
Tue Mar 12 01:02:53 PDT 2024
https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/84740
>From 6c67544327b6e2ee236beae6f94e29a520d68860 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Mon, 11 Mar 2024 04:39:51 -0500
Subject: [PATCH 1/2] [flang][MLIR][OpenMP] Extend delayed privatization for
scalar allocatables
One more step in extending support for delayed privatization. This diff
adds support for scalar allocatables.
---
.../flang/Optimizer/Builder/BoxValue.h | 11 +++++
flang/lib/Lower/Bridge.cpp | 24 ++++++-----
.../lib/Lower/OpenMP/DataSharingProcessor.cpp | 8 ++--
...privatization-allocatable-firstprivate.f90 | 33 +++++++++++++++
...ayed-privatization-allocatable-private.f90 | 40 +++++++++++++++++++
5 files changed, 103 insertions(+), 13 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
diff --git a/flang/include/flang/Optimizer/Builder/BoxValue.h b/flang/include/flang/Optimizer/Builder/BoxValue.h
index 2fed2d48a7a080..9b84bfb5ab0bf7 100644
--- a/flang/include/flang/Optimizer/Builder/BoxValue.h
+++ b/flang/include/flang/Optimizer/Builder/BoxValue.h
@@ -535,6 +535,17 @@ class ExtendedValue : public details::matcher<ExtendedValue> {
const VT &matchee() const { return box; }
+ /// Clone an ExtendedValue to a new instance changing the base address.
+ ///
+ /// TODO So far, this is only a shallow clone; only the base address is
+ /// replaced. This will probably be extended to implement deep cloning to
+ /// support scenarios such as delayed privatization for ArrayBoxValue's.
+ ExtendedValue clone(mlir::Value newBase) const {
+ return match(
+ [&](const UnboxedValue &box) -> ExtendedValue { return newBase; },
+ [&](const auto &box) -> ExtendedValue { return box.clone(newBase); });
+ }
+
private:
VT box;
};
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index a668ba4116faab..5fa914d087f020 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -748,7 +748,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
void copyVar(mlir::Location loc, mlir::Value dst,
mlir::Value src) override final {
- copyVarHLFIR(loc, dst, src);
+ copyVarHLFIR(loc, Fortran::lower::SymbolBox::Intrinsic{dst},
+ Fortran::lower::SymbolBox::Intrinsic{src});
}
void copyHostAssociateVar(
@@ -1009,10 +1010,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
// `omp.private`'s `alloc` block. If this is the case, we return this
// `SymbolBox::Intrinsic` value.
if (Fortran::lower::SymbolBox v = symMap->lookupSymbol(sym))
- return v.match(
- [&](const Fortran::lower::SymbolBox::Intrinsic &)
- -> Fortran::lower::SymbolBox { return v; },
- [](const auto &) -> Fortran::lower::SymbolBox { return {}; });
+ return v;
return {};
}
@@ -1060,15 +1058,16 @@ class FirConverter : public Fortran::lower::AbstractConverter {
const Fortran::lower::SymbolBox &rhs_sb) {
mlir::Location loc = genLocation(sym.name());
if (lowerToHighLevelFIR())
- copyVarHLFIR(loc, lhs_sb.getAddr(), rhs_sb.getAddr());
+ copyVarHLFIR(loc, lhs_sb, rhs_sb);
else
copyVarFIR(loc, sym, lhs_sb, rhs_sb);
}
- void copyVarHLFIR(mlir::Location loc, mlir::Value dst, mlir::Value src) {
+ void copyVarHLFIR(mlir::Location loc, Fortran::lower::SymbolBox dst,
+ Fortran::lower::SymbolBox src) {
assert(lowerToHighLevelFIR());
- hlfir::Entity lhs{dst};
- hlfir::Entity rhs{src};
+ hlfir::Entity lhs{dst.getAddr()};
+ hlfir::Entity rhs{src.getAddr()};
// Temporary_lhs is set to true in hlfir.assign below to avoid user
// assignment to be used and finalization to be called on the LHS.
// This may or may not be correct but mimics the current behaviour
@@ -1082,7 +1081,12 @@ class FirConverter : public Fortran::lower::AbstractConverter {
/*keepLhsLengthInAllocatableAssignment=*/false,
/*temporary_lhs=*/true);
};
- if (lhs.isAllocatable()) {
+
+ bool isBoxAllocatable = dst.match(
+ [](const fir::MutableBoxValue &box) { return box.isAllocatable(); },
+ [](const auto &box) { return false; });
+
+ if (isBoxAllocatable) {
// Deep copy allocatable if it is allocated.
// Note that when allocated, the RHS is already allocated with the LHS
// shape for copy on entry in createHostAssociateVarClone.
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 717b8cc0276a30..bee533949c6564 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -50,6 +50,7 @@ void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
}
void DataSharingProcessor::insertDeallocs() {
+ // TODO Extend delayed privatization to include a `dealloc` region?
for (const Fortran::semantics::Symbol *sym : privatizedSymbols)
if (Fortran::semantics::IsAllocatable(sym->GetUltimate())) {
converter.createHostAssociateVarCloneDealloc(*sym);
@@ -376,6 +377,7 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
symLoc, uniquePrivatizerName, symType,
isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate
: mlir::omp::DataSharingClauseType::Private);
+ fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym);
symTable->pushScope();
@@ -386,7 +388,7 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
&allocRegion, /*insertPt=*/{}, symType, symLoc);
firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
- symTable->addSymbol(*sym, allocRegion.getArgument(0));
+ symTable->addSymbol(*sym, symExV.clone(allocRegion.getArgument(0)));
symTable->pushScope();
cloneSymbol(sym);
firOpBuilder.create<mlir::omp::YieldOp>(
@@ -403,10 +405,10 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
mlir::Block *copyEntryBlock = firOpBuilder.createBlock(
©Region, /*insertPt=*/{}, {symType, symType}, {symLoc, symLoc});
firOpBuilder.setInsertionPointToEnd(copyEntryBlock);
- symTable->addSymbol(*sym, copyRegion.getArgument(0),
+ symTable->addSymbol(*sym, symExV.clone(copyRegion.getArgument(0)),
/*force=*/true);
symTable->pushScope();
- symTable->addSymbol(*sym, copyRegion.getArgument(1));
+ symTable->addSymbol(*sym, symExV.clone(copyRegion.getArgument(1)));
auto ip = firOpBuilder.saveInsertionPoint();
copyFirstPrivateSymbol(sym, &ip);
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
new file mode 100644
index 00000000000000..545221b4c1abfe
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
@@ -0,0 +1,33 @@
+! Test delayed privatization for allocatables: `firstprivate`.
+
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+
+subroutine delayed_privatization_allocatable
+ implicit none
+ integer, allocatable :: var1
+
+!$omp parallel firstprivate(var1)
+ var1 = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.heap<i32>>>]] alloc {
+
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK: } copy {
+! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK-NEXT: %[[PRIV_BASE_VAL:.*]] = fir.load %[[PRIV_PRIV_ARG]]
+! CHECK-NEXT: %[[PRIV_BASE_BOX:.*]] = fir.box_addr %[[PRIV_BASE_VAL]]
+! CHECK-NEXT: %[[PRIV_BASE_ADDR:.*]] = fir.convert %[[PRIV_BASE_BOX]]
+! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT: %[[COPY_COND:.*]] = arith.cmpi ne, %[[PRIV_BASE_ADDR]], %[[C0]] : i64
+
+! CHECK-NEXT: fir.if %[[COPY_COND]] {
+! CHECK-NEXT: %[[ORIG_BASE_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]]
+! CHECK-NEXT: %[[ORIG_BASE_ADDR:.*]] = fir.box_addr %[[ORIG_BASE_VAL]]
+! CHECK-NEXT: %[[ORIG_BASE_LD:.*]] = fir.load %[[ORIG_BASE_ADDR]]
+! CHECK-NEXT: hlfir.assign %[[ORIG_BASE_LD]] to %[[PRIV_BASE_BOX]] temporary_lhs
+! CHECK-NEXT: }
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
new file mode 100644
index 00000000000000..83fb516879ff20
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
@@ -0,0 +1,40 @@
+! Test delayed privatization for allocatables: `private`.
+
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+
+subroutine delayed_privatization_allocatable
+ implicit none
+ integer, allocatable :: var1
+
+!$omp parallel private(var1)
+ var1 = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = private}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.heap<i32>>>]] alloc {
+
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<i32>> {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_allocatableEvar1"}
+
+! CHECK-NEXT: %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK-NEXT: %[[PRIV_ARG_BOX:.*]] = fir.box_addr %[[PRIV_ARG_VAL]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+! CHECK-NEXT: %[[PRIV_ARG_ADDR:.*]] = fir.convert %[[PRIV_ARG_BOX]] : (!fir.heap<i32>) -> i64
+! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT: %[[ALLOC_COND:.*]] = arith.cmpi ne, %[[PRIV_ARG_ADDR]], %[[C0]] : i64
+
+! CHECK-NEXT: fir.if %[[ALLOC_COND]] {
+! CHECK-NEXT: %[[PRIV_ALLOCMEM:.*]] = fir.allocmem i32 {fir.must_be_heap = true, uniq_name = "_QFdelayed_privatization_allocatableEvar1.alloc"}
+! CHECK-NEXT: %[[PRIV_ALLOCMEM_BOX:.*]] = fir.embox %[[PRIV_ALLOCMEM]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+! CHECK-NEXT: fir.store %[[PRIV_ALLOCMEM_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK-NEXT: } else {
+! CHECK-NEXT: %[[ZERO_BITS:.*]] = fir.zero_bits !fir.heap<i32>
+! CHECK-NEXT: %[[ZERO_BOX:.*]] = fir.embox %[[ZERO_BITS]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+! CHECK-NEXT: fir.store %[[ZERO_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK-NEXT: }
+
+! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]]
+! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])
+
+! CHECK-NEXT: }
>From e6a7bd80afd95d96b6f145f9b5d50999504a705c Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Tue, 12 Mar 2024 03:02:33 -0500
Subject: [PATCH 2/2] Handle review comments.
---
flang/include/flang/Optimizer/Builder/BoxValue.h | 11 -----------
flang/lib/Lower/OpenMP/DataSharingProcessor.cpp | 9 +++++----
2 files changed, 5 insertions(+), 15 deletions(-)
diff --git a/flang/include/flang/Optimizer/Builder/BoxValue.h b/flang/include/flang/Optimizer/Builder/BoxValue.h
index 9b84bfb5ab0bf7..2fed2d48a7a080 100644
--- a/flang/include/flang/Optimizer/Builder/BoxValue.h
+++ b/flang/include/flang/Optimizer/Builder/BoxValue.h
@@ -535,17 +535,6 @@ class ExtendedValue : public details::matcher<ExtendedValue> {
const VT &matchee() const { return box; }
- /// Clone an ExtendedValue to a new instance changing the base address.
- ///
- /// TODO So far, this is only a shallow clone; only the base address is
- /// replaced. This will probably be extended to implement deep cloning to
- /// support scenarios such as delayed privatization for ArrayBoxValue's.
- ExtendedValue clone(mlir::Value newBase) const {
- return match(
- [&](const UnboxedValue &box) -> ExtendedValue { return newBase; },
- [&](const auto &box) -> ExtendedValue { return box.clone(newBase); });
- }
-
private:
VT box;
};
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index bee533949c6564..4b2d0dbb43ed57 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -50,7 +50,7 @@ void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
}
void DataSharingProcessor::insertDeallocs() {
- // TODO Extend delayed privatization to include a `dealloc` region?
+ // TODO Extend delayed privatization to include a `dealloc` region.
for (const Fortran::semantics::Symbol *sym : privatizedSymbols)
if (Fortran::semantics::IsAllocatable(sym->GetUltimate())) {
converter.createHostAssociateVarCloneDealloc(*sym);
@@ -388,7 +388,8 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
&allocRegion, /*insertPt=*/{}, symType, symLoc);
firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
- symTable->addSymbol(*sym, symExV.clone(allocRegion.getArgument(0)));
+ symTable->addSymbol(*sym,
+ fir::substBase(symExV, allocRegion.getArgument(0)));
symTable->pushScope();
cloneSymbol(sym);
firOpBuilder.create<mlir::omp::YieldOp>(
@@ -405,10 +406,10 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
mlir::Block *copyEntryBlock = firOpBuilder.createBlock(
©Region, /*insertPt=*/{}, {symType, symType}, {symLoc, symLoc});
firOpBuilder.setInsertionPointToEnd(copyEntryBlock);
- symTable->addSymbol(*sym, symExV.clone(copyRegion.getArgument(0)),
+ symTable->addSymbol(*sym, fir::substBase(symExV, copyRegion.getArgument(0)),
/*force=*/true);
symTable->pushScope();
- symTable->addSymbol(*sym, symExV.clone(copyRegion.getArgument(1)));
+ symTable->addSymbol(*sym, fir::substBase(symExV, copyRegion.getArgument(1)));
auto ip = firOpBuilder.saveInsertionPoint();
copyFirstPrivateSymbol(sym, &ip);
More information about the flang-commits
mailing list