[flang-commits] [flang] [flang][OpenMP] Lower `target .. private(..)` to `omp.private` ops (PR #94195)
Kareem Ergawy via flang-commits
flang-commits at lists.llvm.org
Mon Jun 3 02:01:24 PDT 2024
https://github.com/ergawy created https://github.com/llvm/llvm-project/pull/94195
Extends delayed privatization support to `taraget .. private(..)`. With this PR, `private` is support for `target` **only** is delayed privatization mode.
>From cd6cce5a43b269d6cce67c71a0c4575a3ef0a5f9 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Mon, 3 Jun 2024 02:15:58 -0500
Subject: [PATCH] [flang][OpenMP] Lower `target .. private(..)` to
`omp.private` ops
Extends delayed privatization support to `taraget .. private(..)`. With
this PR, `private` is support for `target` **only** is delayed
privatization mode.
---
.../lib/Lower/OpenMP/DataSharingProcessor.cpp | 18 ++---
flang/lib/Lower/OpenMP/DataSharingProcessor.h | 18 ++---
flang/lib/Lower/OpenMP/OpenMP.cpp | 71 +++++++++++++++----
.../target-private-allocatable.f90 | 69 ++++++++++++++++++
.../target-private-simple.f90 | 38 ++++++++++
5 files changed, 181 insertions(+), 33 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/DelayedPrivatization/target-private-allocatable.f90
create mode 100644 flang/test/Lower/OpenMP/DelayedPrivatization/target-private-simple.f90
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 557a9685024c5..d8421b00cbd50 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -48,14 +48,13 @@ DataSharingProcessor::DataSharingProcessor(
}
void DataSharingProcessor::processStep1(
- mlir::omp::PrivateClauseOps *clauseOps,
- llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms) {
+ mlir::omp::PrivateClauseOps *clauseOps) {
collectSymbolsForPrivatization();
collectDefaultSymbols();
collectImplicitSymbols();
collectPreDeterminedSymbols();
- privatize(clauseOps, privateSyms);
+ privatize(clauseOps);
insertBarrier();
}
@@ -416,15 +415,14 @@ void DataSharingProcessor::collectPreDeterminedSymbols() {
}
void DataSharingProcessor::privatize(
- mlir::omp::PrivateClauseOps *clauseOps,
- llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms) {
+ mlir::omp::PrivateClauseOps *clauseOps) {
for (const semantics::Symbol *sym : allPrivatizedSymbols) {
if (const auto *commonDet =
sym->detailsIf<semantics::CommonBlockDetails>()) {
for (const auto &mem : commonDet->objects())
- doPrivatize(&*mem, clauseOps, privateSyms);
+ doPrivatize(&*mem, clauseOps);
} else
- doPrivatize(sym, clauseOps, privateSyms);
+ doPrivatize(sym, clauseOps);
}
}
@@ -442,8 +440,7 @@ void DataSharingProcessor::copyLastPrivatize(mlir::Operation *op) {
}
void DataSharingProcessor::doPrivatize(
- const semantics::Symbol *sym, mlir::omp::PrivateClauseOps *clauseOps,
- llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms) {
+ const semantics::Symbol *sym, mlir::omp::PrivateClauseOps *clauseOps) {
if (!useDelayedPrivatization) {
cloneSymbol(sym);
copyFirstPrivateSymbol(sym);
@@ -548,9 +545,6 @@ void DataSharingProcessor::doPrivatize(
clauseOps->privateVars.push_back(hsb.getAddr());
}
- if (privateSyms)
- privateSyms->push_back(sym);
-
symToPrivatizer[sym] = privatizerOp;
}
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
index 80a956de35ba9..fa2f87eb8d764 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
@@ -103,18 +103,15 @@ class DataSharingProcessor {
void collectDefaultSymbols();
void collectImplicitSymbols();
void collectPreDeterminedSymbols();
- void privatize(mlir::omp::PrivateClauseOps *clauseOps,
- llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms);
+ void privatize(mlir::omp::PrivateClauseOps *clauseOps);
void defaultPrivatize(
mlir::omp::PrivateClauseOps *clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms);
void implicitPrivatize(
mlir::omp::PrivateClauseOps *clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms);
- void
- doPrivatize(const semantics::Symbol *sym,
- mlir::omp::PrivateClauseOps *clauseOps,
- llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms);
+ void doPrivatize(const semantics::Symbol *sym,
+ mlir::omp::PrivateClauseOps *clauseOps);
void copyLastPrivatize(mlir::Operation *op);
void insertLastPrivateCompare(mlir::Operation *op);
void cloneSymbol(const semantics::Symbol *sym);
@@ -145,15 +142,18 @@ class DataSharingProcessor {
// Step2 performs the copying for lastprivates and requires knowledge of the
// MLIR operation to insert the last private update. Step2 adds
// dealocation code as well.
- void processStep1(
- mlir::omp::PrivateClauseOps *clauseOps = nullptr,
- llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms = nullptr);
+ void processStep1(mlir::omp::PrivateClauseOps *clauseOps = nullptr);
void processStep2(mlir::Operation *op, bool isLoop);
void setLoopIV(mlir::Value iv) {
assert(!loopIV && "Loop iteration variable already set");
loopIV = iv;
}
+
+ const llvm::SetVector<const semantics::Symbol *> &
+ getAllSymbolsToPrivatize() const {
+ return allPrivatizedSymbols;
+ }
};
} // namespace omp
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 9598457d123cf..06ec2534119a1 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -758,15 +758,32 @@ genBodyOfTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
llvm::ArrayRef<const semantics::Symbol *> mapSyms,
llvm::ArrayRef<mlir::Location> mapSymLocs,
llvm::ArrayRef<mlir::Type> mapSymTypes,
+ DataSharingProcessor &dsp,
const mlir::Location ¤tLocation,
const ConstructQueue &queue, ConstructQueue::iterator item) {
assert(mapSymTypes.size() == mapSymLocs.size());
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
mlir::Region ®ion = targetOp.getRegion();
-
- auto *regionBlock =
- firOpBuilder.createBlock(®ion, {}, mapSymTypes, mapSymLocs);
+ mlir::OperandRange privateVars = targetOp.getPrivateVars();
+
+ llvm::SmallVector<mlir::Type> allRegionArgTypes;
+ allRegionArgTypes.reserve(mapSymTypes.size() + targetOp.getPrivateVars().size());
+ llvm::transform(mapSymTypes, std::back_inserter(allRegionArgTypes),
+ [](mlir::Type t) { return t; });
+ llvm::transform(privateVars, std::back_inserter(allRegionArgTypes),
+ [](mlir::Value v) { return v.getType(); });
+
+ llvm::SmallVector<mlir::Location> allRegionArgLocs;
+ allRegionArgTypes.reserve(mapSymTypes.size() +
+ targetOp.getPrivateVars().size());
+ llvm::transform(mapSymLocs, std::back_inserter(allRegionArgLocs),
+ [](mlir::Location l) { return l; });
+ llvm::transform(privateVars, std::back_inserter(allRegionArgLocs),
+ [](mlir::Value v) { return v.getLoc(); });
+
+ auto *regionBlock = firOpBuilder.createBlock(®ion, {}, allRegionArgTypes,
+ allRegionArgLocs);
// Clones the `bounds` placing them inside the target region and returns them.
auto cloneBound = [&](mlir::Value bound) {
@@ -830,6 +847,20 @@ genBodyOfTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
});
}
+ for (auto [argIndex, argSymbol] :
+ llvm::enumerate(dsp.getAllSymbolsToPrivatize())) {
+ argIndex = mapSyms.size() + argIndex;
+
+ const mlir::BlockArgument &arg = region.getArgument(argIndex);
+ converter.bindSymbol(*argSymbol,
+ hlfir::translateToExtendedValue(
+ currentLocation, firOpBuilder, hlfir::Entity{arg},
+ /*contiguousHint=*/
+ evaluate::IsSimplyContiguous(
+ *argSymbol, converter.getFoldingContext()))
+ .first);
+ }
+
// Check if cloning the bounds introduced any dependency on the outer region.
// If so, then either clone them as well if they are MemoryEffectFree, or else
// copy them to a new temporary and add them to the map and block_argument
@@ -907,6 +938,8 @@ genBodyOfTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
} else {
genNestedEvaluations(converter, eval);
}
+
+ dsp.processStep2(targetOp, /*isLoop=*/false);
}
template <typename OpTy, typename... Args>
@@ -1048,15 +1081,18 @@ static void genTargetClauses(
devicePtrSyms);
cp.processMap(loc, stmtCtx, clauseOps, &mapSyms, &mapLocs, &mapTypes);
cp.processThreadLimit(stmtCtx, clauseOps);
- // TODO Support delayed privatization.
if (processHostOnlyClauses)
cp.processNowait(clauseOps);
cp.processTODO<clause::Allocate, clause::Defaultmap, clause::Firstprivate,
- clause::InReduction, clause::Private, clause::Reduction,
+ clause::InReduction, clause::Reduction,
clause::UsesAllocators>(loc,
llvm::omp::Directive::OMPD_target);
+
+ // `target private(..)` is only supported in delayed privatization mode.
+ if(!enableDelayedPrivatization)
+ cp.processTODO<clause::Private>(loc, llvm::omp::Directive::OMPD_target);
}
static void genTargetDataClauses(
@@ -1289,7 +1325,6 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
lower::StatementContext stmtCtx;
mlir::omp::ParallelClauseOps clauseOps;
- llvm::SmallVector<const semantics::Symbol *> privateSyms;
llvm::SmallVector<mlir::Type> reductionTypes;
llvm::SmallVector<const semantics::Symbol *> reductionSyms;
genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
@@ -1319,7 +1354,7 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
/*useDelayedPrivatization=*/true, &symTable);
if (privatize)
- dsp.processStep1(&clauseOps, &privateSyms);
+ dsp.processStep1(&clauseOps);
auto genRegionEntryCB = [&](mlir::Operation *op) {
auto parallelOp = llvm::cast<mlir::omp::ParallelOp>(op);
@@ -1344,9 +1379,10 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
privateVarLocs);
llvm::SmallVector<const semantics::Symbol *> allSymbols = reductionSyms;
- allSymbols.append(privateSyms);
+ allSymbols.append(dsp.getAllSymbolsToPrivatize().begin(),
+ dsp.getAllSymbolsToPrivatize().end());
+
for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) {
- fir::ExtendedValue hostExV = converter.getSymbolExtendedValue(*arg);
converter.bindSymbol(*arg, hlfir::translateToExtendedValue(
loc, firOpBuilder, hlfir::Entity{prv},
/*contiguousHint=*/
@@ -1541,11 +1577,22 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
deviceAddrLocs, deviceAddrTypes, devicePtrSyms,
devicePtrLocs, devicePtrTypes);
+ llvm::SmallVector<const semantics::Symbol *> privateSyms;
+ DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
+ /*shouldCollectPreDeterminedSymbols=*/
+ lower::omp::isLastItemInQueue(item, queue),
+ /*useDelayedPrivatization=*/true, &symTable);
+ dsp.processStep1(&clauseOps);
+
// 5.8.1 Implicit Data-Mapping Attribute Rules
// The following code follows the implicit data-mapping rules to map all the
- // symbols used inside the region that have not been explicitly mapped using
- // the map clause.
+ // symbols used inside the region that do not have explicit data-environment
+ // attribute clauses (neither data-sharing; e.g. `private`, nor `map`
+ // clauses).
auto captureImplicitMap = [&](const semantics::Symbol &sym) {
+ if (dsp.getAllSymbolsToPrivatize().contains(&sym))
+ return;
+
if (llvm::find(mapSyms, &sym) == mapSyms.end()) {
mlir::Value baseOp = converter.getSymbolAddress(sym);
if (!baseOp)
@@ -1632,7 +1679,7 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps);
genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, mapSyms,
- mapLocs, mapTypes, loc, queue, item);
+ mapLocs, mapTypes, dsp, loc, queue, item);
return targetOp;
}
diff --git a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-allocatable.f90 b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-allocatable.f90
new file mode 100644
index 0000000000000..5131aed29e9de
--- /dev/null
+++ b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-allocatable.f90
@@ -0,0 +1,69 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN: -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 \
+! RUN: | FileCheck %s
+
+subroutine target_allocatable
+ implicit none
+ integer, allocatable :: alloc_var
+
+ !$omp target private(alloc_var)
+ alloc_var = 10
+ !$omp end target
+end subroutine target_allocatable
+
+! CHECK-LABEL: omp.private {type = private}
+! CHECK-SAME: @[[VAR_PRIVATIZER_SYM:.*]] :
+! CHECK-SAME: [[TYPE:!fir.ref<!fir.box<!fir.heap<i32>>>]] alloc {
+! CHECK: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+! CHECK: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<i32>> {bindc_name = "alloc_var", {{.*}}}
+
+! CHECK-NEXT: %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK-NEXT: %[[PRIV_ARG_BOX:.*]] = fir.box_addr %[[PRIV_ARG_VAL]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+! CHECK-NEXT: %[[PRIV_ARG_ADDR:.*]] = fir.convert %[[PRIV_ARG_BOX]] : (!fir.heap<i32>) -> i64
+! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT: %[[ALLOC_COND:.*]] = arith.cmpi ne, %[[PRIV_ARG_ADDR]], %[[C0]] : i64
+
+! CHECK-NEXT: fir.if %[[ALLOC_COND]] {
+! CHECK: %[[PRIV_ALLOCMEM:.*]] = fir.allocmem i32 {fir.must_be_heap = true, {{.*}}}
+! CHECK-NEXT: %[[PRIV_ALLOCMEM_BOX:.*]] = fir.embox %[[PRIV_ALLOCMEM]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+! CHECK-NEXT: fir.store %[[PRIV_ALLOCMEM_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK-NEXT: } else {
+! CHECK-NEXT: %[[ZERO_BITS:.*]] = fir.zero_bits !fir.heap<i32>
+! CHECK-NEXT: %[[ZERO_BOX:.*]] = fir.embox %[[ZERO_BITS]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+! CHECK-NEXT: fir.store %[[ZERO_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK-NEXT: }
+
+! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]]
+! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])
+
+! CHECK-NEXT: } dealloc {
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK-NEXT: %[[PRIV_VAL:.*]] = fir.load %[[PRIV_ARG]]
+! CHECK-NEXT: %[[PRIV_ADDR:.*]] = fir.box_addr %[[PRIV_VAL]]
+! CHECK-NEXT: %[[PRIV_ADDR_I64:.*]] = fir.convert %[[PRIV_ADDR]]
+! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT: %[[PRIV_NULL_COND:.*]] = arith.cmpi ne, %[[PRIV_ADDR_I64]], %[[C0]] : i64
+
+! CHECK-NEXT: fir.if %[[PRIV_NULL_COND]] {
+! CHECK: %[[PRIV_VAL_2:.*]] = fir.load %[[PRIV_ARG]]
+! CHECK-NEXT: %[[PRIV_ADDR_2:.*]] = fir.box_addr %[[PRIV_VAL_2]]
+! CHECK-NEXT: fir.freemem %[[PRIV_ADDR_2]]
+! CHECK-NEXT: %[[ZEROS:.*]] = fir.zero_bits
+! CHECK-NEXT: %[[ZEROS_BOX:.*]] = fir.embox %[[ZEROS]]
+! CHECK-NEXT: fir.store %[[ZEROS_BOX]] to %[[PRIV_ARG]]
+! CHECK-NEXT: }
+
+! CHECK-NEXT: omp.yield
+! CHECK-NEXT: }
+
+
+! CHECK-LABEL: func.func @_QPtarget_allocatable() {
+
+! CHECK: %[[VAR_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<i32>>
+! CHECK-SAME: {bindc_name = "alloc_var", {{.*}}}
+! CHECK: %[[VAR_DECL:.*]]:2 = hlfir.declare %[[VAR_ALLOC]]
+
+! CHECK: omp.target private(
+! CHECK-SAME: @[[VAR_PRIVATIZER_SYM]] %[[VAR_DECL]]#0 -> %{{.*}} : [[TYPE]]) {
diff --git a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-simple.f90 b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-simple.f90
new file mode 100644
index 0000000000000..951fdcb810e5c
--- /dev/null
+++ b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-simple.f90
@@ -0,0 +1,38 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN: -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 \
+! RUN: | FileCheck %s
+
+subroutine target_simple
+ implicit none
+ integer :: simple_var
+
+ !$omp target private(simple_var)
+ simple_var = 10
+ !$omp end target
+end subroutine target_simple
+
+! CHECK-LABEL: omp.private {type = private}
+! CHECK-SAME: @[[VAR_PRIVATIZER_SYM:.*]] : !fir.ref<i32> alloc {
+! CHECK: ^bb0(%[[PRIV_ARG:.*]]: !fir.ref<i32>):
+! CHECK: %[[PRIV_ALLOC:.*]] = fir.alloca i32 {bindc_name = "simple_var", {{.*}}}
+! CHECK: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]]
+! CHECK: omp.yield(%[[PRIV_DECL]]#0 : !fir.ref<i32>)
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPtarget_simple() {
+! CHECK: %[[VAR_ALLOC:.*]] = fir.alloca i32 {bindc_name = "simple_var", {{.*}}}
+! CHECK: %[[VAR_DECL:.*]]:2 = hlfir.declare %[[VAR_ALLOC]]
+
+! CHECK: omp.target private(
+! CHECK-SAME: @[[VAR_PRIVATIZER_SYM]] %[[VAR_DECL]]#0 -> %{{.*}} : !fir.ref<i32>) {
+! CHECK: ^bb0(%[[REG_ARG:.*]]: !fir.ref<i32>):
+! CHECK: %[[REG_DECL:.*]]:2 = hlfir.declare %[[REG_ARG]]
+! CHECK: %[[C10:.*]] = arith.constant 10
+! CHECK: hlfir.assign %[[C10]] to %[[REG_DECL]]#0
+! CHECK: omp.terminator
+! CHECK: }
+
+! CHECK: return
+! CHECK: }
+
More information about the flang-commits
mailing list