[flang-commits] [openmp] [flang] [flang][OpenMP] Add support for `target ... private` (PR #78968)
Kareem Ergawy via flang-commits
flang-commits at lists.llvm.org
Mon Jan 22 05:39:30 PST 2024
https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/78968
>From a57b8d33e606370471c2de2cf979dcb5d2e34a6f Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Mon, 22 Jan 2024 06:19:22 -0600
Subject: [PATCH] [flang][OpenMP] Add support for `target ... private`
Adds support for the `private` clause in the `target` directive. In
order to support that, the `DataSharingProcessor` was also restructured
in order to separate the collection of prviate symbols from their actual
privatization code-gen.
The commit adds both a code-gen and an offloading tests.
---
flang/lib/Lower/OpenMP.cpp | 81 +++++++++++++------
flang/test/Lower/OpenMP/target_private.f90 | 30 +++++++
.../offloading/fortran/target_private.f90 | 29 +++++++
3 files changed, 117 insertions(+), 23 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/target_private.f90
create mode 100644 openmp/libomptarget/test/offloading/fortran/target_private.f90
diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 7dd25f75d9eb76f..35202d2a8fdc6bf 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -143,15 +143,17 @@ static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
//===----------------------------------------------------------------------===//
class DataSharingProcessor {
+ using SymbolSet = llvm::SetVector<const Fortran::semantics::Symbol *>;
+
bool hasLastPrivateOp;
mlir::OpBuilder::InsertPoint lastPrivIP;
mlir::OpBuilder::InsertPoint insPt;
mlir::Value loopIV;
// Symbols in private, firstprivate, and/or lastprivate clauses.
- llvm::SetVector<const Fortran::semantics::Symbol *> privatizedSymbols;
- llvm::SetVector<const Fortran::semantics::Symbol *> defaultSymbols;
- llvm::SetVector<const Fortran::semantics::Symbol *> symbolsInNestedRegions;
- llvm::SetVector<const Fortran::semantics::Symbol *> symbolsInParentRegions;
+ SymbolSet privatizedSymbols;
+ SymbolSet defaultSymbols;
+ SymbolSet symbolsInNestedRegions;
+ SymbolSet symbolsInParentRegions;
Fortran::lower::AbstractConverter &converter;
fir::FirOpBuilder &firOpBuilder;
const Fortran::parser::OmpClauseList &opClauseList;
@@ -182,35 +184,54 @@ class DataSharingProcessor {
: hasLastPrivateOp(false), converter(converter),
firOpBuilder(converter.getFirOpBuilder()), opClauseList(opClauseList),
eval(eval) {}
- // Privatisation is split into two steps.
- // Step1 performs cloning of all privatisation clauses and copying for
- // firstprivates. Step1 is performed at the place where process/processStep1
+ // Privatisation is split into 3 steps:
+ //
+ // * Step1: collects all symbols that should be privatized.
+ //
+ // * Step2: performs cloning of all privatisation clauses and copying for
+ // firstprivates. Step2 is performed at the place where process/processStep2
// is called. This is usually inside the Operation corresponding to the OpenMP
- // construct, for looping constructs this is just before the Operation. The
- // split into two steps was performed basically to be able to call
- // privatisation for looping constructs before the operation is created since
- // the bounds of the MLIR OpenMP operation can be privatised.
- // Step2 performs the copying for lastprivates and requires knowledge of the
- // MLIR operation to insert the last private update. Step2 adds
+ // construct, for looping constructs this is just before the Operation.
+ //
+ // * Step3: performs the copying for lastprivates and requires knowledge of
+ // the MLIR operation to insert the last private update. Step3 adds
// dealocation code as well.
+ //
+ // The split was performed for the following reasons:
+ //
+ // 1. Step1 was split so that the `target` op knows which symbols should not
+ // be mapped into the target region due to being `private`. The implicit
+ // mapping happens before the op body is generated so we need to to collect
+ // the private symbols first and then later in the body actually privatize
+ // them.
+ //
+ // 2. Step2 was split in order to call privatisation for looping constructs
+ // before the operation is created since the bounds of the MLIR OpenMP
+ // operation can be privatised.
void processStep1();
- void processStep2(mlir::Operation *op, bool isLoop);
+ void processStep2();
+ void processStep3(mlir::Operation *op, bool isLoop);
void setLoopIV(mlir::Value iv) {
assert(!loopIV && "Loop iteration variable already set");
loopIV = iv;
}
+
+ const SymbolSet &getPrivatizedSymbols() const { return privatizedSymbols; }
};
void DataSharingProcessor::processStep1() {
collectSymbolsForPrivatization();
collectDefaultSymbols();
+}
+
+void DataSharingProcessor::processStep2() {
privatize();
defaultPrivatize();
insertBarrier();
}
-void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
+void DataSharingProcessor::processStep3(mlir::Operation *op, bool isLoop) {
insPt = firOpBuilder.saveInsertionPoint();
copyLastPrivatize(op);
firOpBuilder.restoreInsertionPoint(insPt);
@@ -2306,11 +2327,12 @@ static void createBodyOfOp(
if (!dsp) {
DataSharingProcessor proc(converter, *clauses, eval);
proc.processStep1();
- proc.processStep2(op, isLoop);
+ proc.processStep2();
+ proc.processStep3(op, isLoop);
} else {
if (isLoop && args.size() > 0)
dsp->setLoopIV(converter.getSymbolAddress(*args[0]));
- dsp->processStep2(op, isLoop);
+ dsp->processStep3(op, isLoop);
}
if (storeOp)
@@ -2648,7 +2670,9 @@ static void genBodyOfTargetOp(
const llvm::SmallVector<mlir::Type> &mapSymTypes,
const llvm::SmallVector<mlir::Location> &mapSymLocs,
const llvm::SmallVector<const Fortran::semantics::Symbol *> &mapSymbols,
- const mlir::Location ¤tLocation) {
+ const mlir::Location ¤tLocation,
+ const Fortran::parser::OmpClauseList &clauseList,
+ DataSharingProcessor &dsp) {
assert(mapSymTypes.size() == mapSymLocs.size());
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
@@ -2657,6 +2681,8 @@ static void genBodyOfTargetOp(
auto *regionBlock =
firOpBuilder.createBlock(®ion, {}, mapSymTypes, mapSymLocs);
+ dsp.processStep2();
+
// Clones the `bounds` placing them inside the target region and returns them.
auto cloneBound = [&](mlir::Value bound) {
if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
@@ -2811,8 +2837,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
cp.processNowait(nowaitAttr);
cp.processMap(currentLocation, directive, semanticsContext, stmtCtx,
mapOperands, &mapSymTypes, &mapSymLocs, &mapSymbols);
- cp.processTODO<Fortran::parser::OmpClause::Private,
- Fortran::parser::OmpClause::Depend,
+ cp.processTODO<Fortran::parser::OmpClause::Depend,
Fortran::parser::OmpClause::Firstprivate,
Fortran::parser::OmpClause::IsDevicePtr,
Fortran::parser::OmpClause::HasDeviceAddr,
@@ -2823,11 +2848,19 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
Fortran::parser::OmpClause::Defaultmap>(
currentLocation, llvm::omp::Directive::OMPD_target);
+ DataSharingProcessor dsp(converter, clauseList, eval);
+ dsp.processStep1();
+
// 5.8.1 Implicit Data-Mapping Attribute Rules
// The following code follows the implicit data-mapping rules to map all the
- // symbols used inside the region that have not been explicitly mapped using
- // the map clause.
+ // symbols used inside the region that do not have explicit data-environment
+ // attribute clauses (neither data-sharing; e.g. `private`, nor `map`
+ // clauses).
auto captureImplicitMap = [&](const Fortran::semantics::Symbol &sym) {
+ if (dsp.getPrivatizedSymbols().contains(&sym)) {
+ return;
+ }
+
if (llvm::find(mapSymbols, &sym) == mapSymbols.end()) {
mlir::Value baseOp = converter.getSymbolAddress(sym);
if (!baseOp)
@@ -2893,7 +2926,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
nowaitAttr, mapOperands);
genBodyOfTargetOp(converter, eval, genNested, targetOp, mapSymTypes,
- mapSymLocs, mapSymbols, currentLocation);
+ mapSymLocs, mapSymbols, currentLocation, clauseList, dsp);
return targetOp;
}
@@ -3127,6 +3160,7 @@ createSimdLoop(Fortran::lower::AbstractConverter &converter,
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
DataSharingProcessor dsp(converter, loopOpClauseList, eval);
dsp.processStep1();
+ dsp.processStep2();
Fortran::lower::StatementContext stmtCtx;
mlir::Value scheduleChunkClauseOperand, ifClauseOperand;
@@ -3179,6 +3213,7 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter,
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
DataSharingProcessor dsp(converter, beginClauseList, eval);
dsp.processStep1();
+ dsp.processStep2();
Fortran::lower::StatementContext stmtCtx;
mlir::Value scheduleChunkClauseOperand;
diff --git a/flang/test/Lower/OpenMP/target_private.f90 b/flang/test/Lower/OpenMP/target_private.f90
new file mode 100644
index 000000000000000..98e3b79d035dfd8
--- /dev/null
+++ b/flang/test/Lower/OpenMP/target_private.f90
@@ -0,0 +1,30 @@
+!Test data-sharing attribute clauses for the `target` directive.
+
+!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+!CHECK-LABEL: func.func @_QPomp_target_private()
+subroutine omp_target_private
+ implicit none
+ integer :: x(1)
+
+!$omp target private(x)
+ x(1) = 42
+!$omp end target
+!CHECK: omp.target {
+!CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
+!CHECK-DAG: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.array<1xi32> {bindc_name = "x",
+!CHECK-SAME: pinned, uniq_name = "_QFomp_target_privateEx"}
+!CHECK-NEXT: %[[SHAPE:.*]] = fir.shape %[[C1]] : (index) -> !fir.shape<1>
+!CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]](%[[SHAPE]])
+!CHECK-SAME: {uniq_name = "_QFomp_target_privateEx"} :
+!CHECK-SAME: (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) ->
+!CHECK-SAME: (!fir.ref<!fir.array<1xi32>>, !fir.ref<!fir.array<1xi32>>)
+!CHECK-DAG: %[[C42:.*]] = arith.constant 42 : i32
+!CHECK-DAG: %[[C1_2:.*]] = arith.constant 1 : index
+!CHECK-NEXT: %[[PRIV_BINDING:.*]] = hlfir.designate %[[PRIV_DECL]]#0 (%[[C1_2]])
+!CHECK-SAME: : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
+!CHECK-NEXT: hlfir.assign %[[C42]] to %[[PRIV_BINDING]] : i32, !fir.ref<i32>
+!CHECK-NEXT: omp.terminator
+!CHECK-NEXT: }
+
+end subroutine omp_target_private
diff --git a/openmp/libomptarget/test/offloading/fortran/target_private.f90 b/openmp/libomptarget/test/offloading/fortran/target_private.f90
new file mode 100644
index 000000000000000..486c23ec2ec8d20
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/fortran/target_private.f90
@@ -0,0 +1,29 @@
+! Basic offloading test with a target region
+! REQUIRES: flang
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-generic
+! RUN: env LIBOMPTARGET_INFO=16 %libomptarget-run-generic 2>&1 | %fcheck-generic
+program target_update
+ implicit none
+ integer :: x(1)
+ integer :: y(1)
+
+ x(1) = 42
+
+!$omp target private(x) map(tofrom: y)
+ x(1) = 84
+ y(1) = x(1)
+!$omp end target
+
+ print *, "x =", x(1)
+ print *, "y =", y(1)
+
+end program target_update
+! CHECK: "PluginInterface" device {{[0-9]+}} info: Launching kernel {{.*}}
+! CHECK: x = 42
+! CHECK: y = 84
More information about the flang-commits
mailing list