[Openmp-commits] [openmp] [flang] [flang][OpenMP] Add support for `target ... private` (PR #78968)

Kareem Ergawy via Openmp-commits openmp-commits at lists.llvm.org
Mon Jan 22 05:39:30 PST 2024


https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/78968

>From a57b8d33e606370471c2de2cf979dcb5d2e34a6f Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Mon, 22 Jan 2024 06:19:22 -0600
Subject: [PATCH] [flang][OpenMP] Add support for `target ... private`

Adds support for the `private` clause in the `target` directive. In
order to support that, the `DataSharingProcessor` was also restructured
in order to separate the collection of prviate symbols from their actual
privatization code-gen.

The commit adds both a code-gen and an offloading tests.
---
 flang/lib/Lower/OpenMP.cpp                    | 81 +++++++++++++------
 flang/test/Lower/OpenMP/target_private.f90    | 30 +++++++
 .../offloading/fortran/target_private.f90     | 29 +++++++
 3 files changed, 117 insertions(+), 23 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/target_private.f90
 create mode 100644 openmp/libomptarget/test/offloading/fortran/target_private.f90

diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 7dd25f75d9eb76f..35202d2a8fdc6bf 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -143,15 +143,17 @@ static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
 //===----------------------------------------------------------------------===//
 
 class DataSharingProcessor {
+  using SymbolSet = llvm::SetVector<const Fortran::semantics::Symbol *>;
+
   bool hasLastPrivateOp;
   mlir::OpBuilder::InsertPoint lastPrivIP;
   mlir::OpBuilder::InsertPoint insPt;
   mlir::Value loopIV;
   // Symbols in private, firstprivate, and/or lastprivate clauses.
-  llvm::SetVector<const Fortran::semantics::Symbol *> privatizedSymbols;
-  llvm::SetVector<const Fortran::semantics::Symbol *> defaultSymbols;
-  llvm::SetVector<const Fortran::semantics::Symbol *> symbolsInNestedRegions;
-  llvm::SetVector<const Fortran::semantics::Symbol *> symbolsInParentRegions;
+  SymbolSet privatizedSymbols;
+  SymbolSet defaultSymbols;
+  SymbolSet symbolsInNestedRegions;
+  SymbolSet symbolsInParentRegions;
   Fortran::lower::AbstractConverter &converter;
   fir::FirOpBuilder &firOpBuilder;
   const Fortran::parser::OmpClauseList &opClauseList;
@@ -182,35 +184,54 @@ class DataSharingProcessor {
       : hasLastPrivateOp(false), converter(converter),
         firOpBuilder(converter.getFirOpBuilder()), opClauseList(opClauseList),
         eval(eval) {}
-  // Privatisation is split into two steps.
-  // Step1 performs cloning of all privatisation clauses and copying for
-  // firstprivates. Step1 is performed at the place where process/processStep1
+  // Privatisation is split into 3 steps:
+  //
+  // * Step1: collects all symbols that should be privatized.
+  //
+  // * Step2: performs cloning of all privatisation clauses and copying for
+  // firstprivates. Step2 is performed at the place where process/processStep2
   // is called. This is usually inside the Operation corresponding to the OpenMP
-  // construct, for looping constructs this is just before the Operation. The
-  // split into two steps was performed basically to be able to call
-  // privatisation for looping constructs before the operation is created since
-  // the bounds of the MLIR OpenMP operation can be privatised.
-  // Step2 performs the copying for lastprivates and requires knowledge of the
-  // MLIR operation to insert the last private update. Step2 adds
+  // construct, for looping constructs this is just before the Operation.
+  //
+  // * Step3: performs the copying for lastprivates and requires knowledge of
+  // the MLIR operation to insert the last private update. Step3 adds
   // dealocation code as well.
+  //
+  // The split was performed for the following reasons:
+  //
+  // 1. Step1 was split so that the `target` op knows which symbols should not
+  // be mapped into the target region due to being `private`. The implicit
+  // mapping happens before the op body is generated so we need to to collect
+  // the private symbols first and then later in the body actually privatize
+  // them.
+  //
+  // 2. Step2 was split in order to call privatisation for looping constructs
+  // before the operation is created since the bounds of the MLIR OpenMP
+  // operation can be privatised.
   void processStep1();
-  void processStep2(mlir::Operation *op, bool isLoop);
+  void processStep2();
+  void processStep3(mlir::Operation *op, bool isLoop);
 
   void setLoopIV(mlir::Value iv) {
     assert(!loopIV && "Loop iteration variable already set");
     loopIV = iv;
   }
+
+  const SymbolSet &getPrivatizedSymbols() const { return privatizedSymbols; }
 };
 
 void DataSharingProcessor::processStep1() {
   collectSymbolsForPrivatization();
   collectDefaultSymbols();
+}
+
+void DataSharingProcessor::processStep2() {
   privatize();
   defaultPrivatize();
   insertBarrier();
 }
 
-void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
+void DataSharingProcessor::processStep3(mlir::Operation *op, bool isLoop) {
   insPt = firOpBuilder.saveInsertionPoint();
   copyLastPrivatize(op);
   firOpBuilder.restoreInsertionPoint(insPt);
@@ -2306,11 +2327,12 @@ static void createBodyOfOp(
     if (!dsp) {
       DataSharingProcessor proc(converter, *clauses, eval);
       proc.processStep1();
-      proc.processStep2(op, isLoop);
+      proc.processStep2();
+      proc.processStep3(op, isLoop);
     } else {
       if (isLoop && args.size() > 0)
         dsp->setLoopIV(converter.getSymbolAddress(*args[0]));
-      dsp->processStep2(op, isLoop);
+      dsp->processStep3(op, isLoop);
     }
 
     if (storeOp)
@@ -2648,7 +2670,9 @@ static void genBodyOfTargetOp(
     const llvm::SmallVector<mlir::Type> &mapSymTypes,
     const llvm::SmallVector<mlir::Location> &mapSymLocs,
     const llvm::SmallVector<const Fortran::semantics::Symbol *> &mapSymbols,
-    const mlir::Location &currentLocation) {
+    const mlir::Location &currentLocation,
+    const Fortran::parser::OmpClauseList &clauseList,
+    DataSharingProcessor &dsp) {
   assert(mapSymTypes.size() == mapSymLocs.size());
 
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
@@ -2657,6 +2681,8 @@ static void genBodyOfTargetOp(
   auto *regionBlock =
       firOpBuilder.createBlock(&region, {}, mapSymTypes, mapSymLocs);
 
+  dsp.processStep2();
+
   // Clones the `bounds` placing them inside the target region and returns them.
   auto cloneBound = [&](mlir::Value bound) {
     if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
@@ -2811,8 +2837,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
   cp.processNowait(nowaitAttr);
   cp.processMap(currentLocation, directive, semanticsContext, stmtCtx,
                 mapOperands, &mapSymTypes, &mapSymLocs, &mapSymbols);
-  cp.processTODO<Fortran::parser::OmpClause::Private,
-                 Fortran::parser::OmpClause::Depend,
+  cp.processTODO<Fortran::parser::OmpClause::Depend,
                  Fortran::parser::OmpClause::Firstprivate,
                  Fortran::parser::OmpClause::IsDevicePtr,
                  Fortran::parser::OmpClause::HasDeviceAddr,
@@ -2823,11 +2848,19 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
                  Fortran::parser::OmpClause::Defaultmap>(
       currentLocation, llvm::omp::Directive::OMPD_target);
 
+  DataSharingProcessor dsp(converter, clauseList, eval);
+  dsp.processStep1();
+
   // 5.8.1 Implicit Data-Mapping Attribute Rules
   // The following code follows the implicit data-mapping rules to map all the
-  // symbols used inside the region that have not been explicitly mapped using
-  // the map clause.
+  // symbols used inside the region that do not have explicit data-environment
+  // attribute clauses (neither data-sharing; e.g. `private`, nor `map`
+  // clauses).
   auto captureImplicitMap = [&](const Fortran::semantics::Symbol &sym) {
+    if (dsp.getPrivatizedSymbols().contains(&sym)) {
+      return;
+    }
+
     if (llvm::find(mapSymbols, &sym) == mapSymbols.end()) {
       mlir::Value baseOp = converter.getSymbolAddress(sym);
       if (!baseOp)
@@ -2893,7 +2926,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
       nowaitAttr, mapOperands);
 
   genBodyOfTargetOp(converter, eval, genNested, targetOp, mapSymTypes,
-                    mapSymLocs, mapSymbols, currentLocation);
+                    mapSymLocs, mapSymbols, currentLocation, clauseList, dsp);
 
   return targetOp;
 }
@@ -3127,6 +3160,7 @@ createSimdLoop(Fortran::lower::AbstractConverter &converter,
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   DataSharingProcessor dsp(converter, loopOpClauseList, eval);
   dsp.processStep1();
+  dsp.processStep2();
 
   Fortran::lower::StatementContext stmtCtx;
   mlir::Value scheduleChunkClauseOperand, ifClauseOperand;
@@ -3179,6 +3213,7 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter,
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   DataSharingProcessor dsp(converter, beginClauseList, eval);
   dsp.processStep1();
+  dsp.processStep2();
 
   Fortran::lower::StatementContext stmtCtx;
   mlir::Value scheduleChunkClauseOperand;
diff --git a/flang/test/Lower/OpenMP/target_private.f90 b/flang/test/Lower/OpenMP/target_private.f90
new file mode 100644
index 000000000000000..98e3b79d035dfd8
--- /dev/null
+++ b/flang/test/Lower/OpenMP/target_private.f90
@@ -0,0 +1,30 @@
+!Test data-sharing attribute clauses for the `target` directive.
+
+!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+!CHECK-LABEL: func.func @_QPomp_target_private()
+subroutine omp_target_private
+    implicit none
+    integer :: x(1)
+
+!$omp target private(x)
+    x(1) = 42
+!$omp end target
+!CHECK: omp.target {
+!CHECK-DAG:    %[[C1:.*]] = arith.constant 1 : index
+!CHECK-DAG:    %[[PRIV_ALLOC:.*]] = fir.alloca !fir.array<1xi32> {bindc_name = "x",
+!CHECK-SAME:     pinned, uniq_name = "_QFomp_target_privateEx"}
+!CHECK-NEXT:   %[[SHAPE:.*]] = fir.shape %[[C1]] : (index) -> !fir.shape<1>
+!CHECK-NEXT:   %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]](%[[SHAPE]])
+!CHECK-SAME:     {uniq_name = "_QFomp_target_privateEx"} :
+!CHECK-SAME:     (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) ->
+!CHECK-SAME:     (!fir.ref<!fir.array<1xi32>>, !fir.ref<!fir.array<1xi32>>)
+!CHECK-DAG:    %[[C42:.*]] = arith.constant 42 : i32
+!CHECK-DAG:    %[[C1_2:.*]] = arith.constant 1 : index
+!CHECK-NEXT:   %[[PRIV_BINDING:.*]] = hlfir.designate %[[PRIV_DECL]]#0 (%[[C1_2]])
+!CHECK-SAME:     : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
+!CHECK-NEXT:   hlfir.assign %[[C42]] to %[[PRIV_BINDING]] : i32, !fir.ref<i32>
+!CHECK-NEXT:   omp.terminator
+!CHECK-NEXT: }
+
+end subroutine omp_target_private
diff --git a/openmp/libomptarget/test/offloading/fortran/target_private.f90 b/openmp/libomptarget/test/offloading/fortran/target_private.f90
new file mode 100644
index 000000000000000..486c23ec2ec8d20
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/fortran/target_private.f90
@@ -0,0 +1,29 @@
+! Basic offloading test with a target region
+! REQUIRES: flang
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-generic
+! RUN: env LIBOMPTARGET_INFO=16 %libomptarget-run-generic 2>&1 | %fcheck-generic
+program target_update
+    implicit none
+    integer :: x(1)
+    integer :: y(1)
+
+    x(1) = 42
+
+!$omp target private(x) map(tofrom: y)
+    x(1) = 84
+    y(1) = x(1)
+!$omp end target
+
+    print *, "x =", x(1)
+    print *, "y =", y(1)
+
+end program target_update
+! CHECK: "PluginInterface" device {{[0-9]+}} info: Launching kernel {{.*}}
+! CHECK: x = 42
+! CHECK: y = 84



More information about the Openmp-commits mailing list