[flang-commits] [flang] [mlir] [flang][OpenMP] Delayed privatization MLIR lowering support for `distribute` (PR #109632)

Kareem Ergawy via flang-commits flang-commits at lists.llvm.org
Mon Sep 23 01:18:24 PDT 2024


https://github.com/ergawy created https://github.com/llvm/llvm-project/pull/109632

Starts delayed privatizaiton support for standalone `distribute` directives. Other flavours of `distribute` are still TODO as well as MLIR to LLVM IR lowering.

>From dd2880af560839c4d0a008ea8d383da30a04a770 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Fri, 20 Sep 2024 08:12:12 -0500
Subject: [PATCH] [flang][OpenMP] Delayed privatization MLIR lowering support
 for `distribute`

Starts delayed privatizaiton support for standalone `distribute`
directives. Other flavours of `distribute` are still TODO as well as
MLIR to LLVM IR lowering.
---
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 86 +++++++++++++------
 .../distribute-standalone-private.f90         | 41 +++++++++
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 14 ++-
 3 files changed, 105 insertions(+), 36 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/DelayedPrivatization/distribute-standalone-private.f90

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 960286732c90c2..42a3eb9c149d69 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -485,6 +485,33 @@ mergePrivateVarsInfo(OMPOp op, llvm::ArrayRef<InfoTy> currentList,
                   infoAccessor);
 }
 
+static void
+bindSymbolsToRegionArgs(lower::AbstractConverter &converter, mlir::Location loc,
+                        llvm::ArrayRef<const semantics::Symbol *> symbols,
+                        mlir::Region &region, unsigned regionBeginArgIdx) {
+  assert(regionBeginArgIdx + symbols.size() <= region.getNumArguments());
+  for (const semantics::Symbol *arg : symbols) {
+    auto bind = [&](const semantics::Symbol *sym) {
+      mlir::BlockArgument blockArg = region.getArgument(regionBeginArgIdx);
+      ++regionBeginArgIdx;
+      converter.bindSymbol(
+          *sym,
+          hlfir::translateToExtendedValue(
+              loc, converter.getFirOpBuilder(), hlfir::Entity{blockArg},
+              /*contiguousHint=*/
+              evaluate::IsSimplyContiguous(*sym, converter.getFoldingContext()))
+              .first);
+    };
+
+    if (const auto *commonDet =
+            arg->detailsIf<semantics::CommonBlockDetails>()) {
+      for (const auto &mem : commonDet->objects())
+        bind(&*mem);
+    } else
+      bind(arg);
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Op body generation helper structures and functions
 //===----------------------------------------------------------------------===//
@@ -1493,28 +1520,7 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
     llvm::SmallVector<const semantics::Symbol *> allSymbols(reductionSyms);
     allSymbols.append(dsp->getDelayedPrivSymbols().begin(),
                       dsp->getDelayedPrivSymbols().end());
-
-    unsigned argIdx = 0;
-    for (const semantics::Symbol *arg : allSymbols) {
-      auto bind = [&](const semantics::Symbol *sym) {
-        mlir::BlockArgument blockArg = region.getArgument(argIdx);
-        ++argIdx;
-        converter.bindSymbol(*sym,
-                             hlfir::translateToExtendedValue(
-                                 loc, firOpBuilder, hlfir::Entity{blockArg},
-                                 /*contiguousHint=*/
-                                 evaluate::IsSimplyContiguous(
-                                     *sym, converter.getFoldingContext()))
-                                 .first);
-      };
-
-      if (const auto *commonDet =
-              arg->detailsIf<semantics::CommonBlockDetails>()) {
-        for (const auto &mem : commonDet->objects())
-          bind(&*mem);
-      } else
-        bind(arg);
-    }
+    bindSymbolsToRegionArgs(converter, loc, allSymbols, region, 0);
 
     return allSymbols;
   };
@@ -1681,7 +1687,6 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    mapTypes, deviceAddrSyms, deviceAddrLocs, deviceAddrTypes,
                    devicePtrSyms, devicePtrLocs, devicePtrTypes);
 
-  llvm::SmallVector<const semantics::Symbol *> privateSyms;
   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/
                            lower::omp::isLastItemInQueue(item, queue),
@@ -1932,22 +1937,49 @@ static void genStandaloneDistribute(lower::AbstractConverter &converter,
                                     ConstructQueue::const_iterator item) {
   lower::StatementContext stmtCtx;
 
+  auto teamsOp = mlir::cast<mlir::omp::TeamsOp>(
+      converter.getFirOpBuilder().getInsertionBlock()->getParentOp());
   mlir::omp::DistributeOperands distributeClauseOps;
   genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
                        distributeClauseOps);
 
-  // TODO: Support delayed privatization.
+  // Privatization for a `distribute` directive is done in the `teams` region to
+  // which the directive binds. Therefore, all privatization logic (delayed as
+  // well as early) happens **before** the `distribute` op is generated (i.e.
+  // inside the parent `teams` op).
   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
-                           /*useDelayedPrivatization=*/false, &symTable);
-  dsp.processStep1();
+                           enableDelayedPrivatizationStaging, &symTable);
+  mlir::omp::PrivateClauseOps privateClauseOps;
+  dsp.processStep1(&privateClauseOps);
+
+  if (enableDelayedPrivatizationStaging) {
+    mlir::Region &teamsRegion = teamsOp.getRegion();
+    unsigned privateVarsArgIdx = teamsRegion.getNumArguments();
+    llvm::SmallVector<mlir::Type> privateVarTypes;
+    llvm::SmallVector<mlir::Location> privateVarLocs;
+
+    for (mlir::Value privateVar : privateClauseOps.privateVars) {
+      privateVarTypes.push_back(privateVar.getType());
+      privateVarLocs.push_back(privateVar.getLoc());
+      teamsOp.getPrivateVarsMutable().append(privateVar);
+    }
+
+    teamsOp.setPrivateSymsAttr(
+        converter.getFirOpBuilder().getArrayAttr(privateClauseOps.privateSyms));
+    teamsRegion.addArguments(privateVarTypes, privateVarLocs);
+
+    llvm::ArrayRef<const semantics::Symbol *> delayedPrivSyms =
+        dsp.getDelayedPrivSymbols();
+    bindSymbolsToRegionArgs(converter, loc, delayedPrivSyms, teamsRegion,
+                            privateVarsArgIdx);
+  }
 
   mlir::omp::LoopNestOperands loopNestClauseOps;
   llvm::SmallVector<const semantics::Symbol *> iv;
   genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
                      loopNestClauseOps, iv);
 
-  // TODO: Populate entry block arguments with private variables.
   auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
       converter, loc, distributeClauseOps, /*blockArgTypes=*/{});
 
diff --git a/flang/test/Lower/OpenMP/DelayedPrivatization/distribute-standalone-private.f90 b/flang/test/Lower/OpenMP/DelayedPrivatization/distribute-standalone-private.f90
new file mode 100644
index 00000000000000..ffb4001eb19630
--- /dev/null
+++ b/flang/test/Lower/OpenMP/DelayedPrivatization/distribute-standalone-private.f90
@@ -0,0 +1,41 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization-staging \
+! RUN:   -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization-staging -o - %s 2>&1 \
+! RUN:   | FileCheck %s
+
+subroutine standalone_distribute
+    implicit none
+    integer :: simple_var, i
+
+    !$omp teams
+    !$omp distribute private(simple_var)
+    do i = 1, 10
+      simple_var = simple_var + i
+    end do
+    !$omp end distribute
+    !$omp end teams
+end subroutine standalone_distribute
+
+! CHECK: omp.private {type = private} @[[I_PRIVATIZER_SYM:.*]] : !fir.ref<i32>
+! CHECK: omp.private {type = private} @[[VAR_PRIVATIZER_SYM:.*]] : !fir.ref<i32>
+
+
+! CHECK-LABEL: func.func @_QPstandalone_distribute() {
+! CHECK:         %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFstandalone_distributeEi"}
+! CHECK:         %[[VAR_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFstandalone_distributeEsimple_var"}
+! CHECK:         omp.teams
+! CHECK-SAME:      private(@[[VAR_PRIVATIZER_SYM]] %[[VAR_DECL]]#0 -> %[[VAR_ARG:.*]] : !fir.ref<i32>,
+! CHECK-SAME:              @[[I_PRIVATIZER_SYM]] %[[I_DECL]]#0 -> %[[I_ARG:.*]] : !fir.ref<i32>) {
+! CHECK:           %[[VAR_PRIV_DECL:.*]]:2 = hlfir.declare %[[VAR_ARG]]
+! CHECK:           %[[I_PRIV_DECL:.*]]:2 = hlfir.declare %[[I_ARG]]
+! CHECK:           omp.distribute {
+! CHECK:             omp.loop_nest {{.*}} {
+! CHECK:               fir.store %{{.*}} to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
+! CHECK:               %{{.*}} = fir.load %[[VAR_PRIV_DECL]]#0 : !fir.ref<i32>
+! CHECK:               %{{.*}} = fir.load %[[I_PRIV_DECL]]#0 : !fir.ref<i32>
+! CHECK:               arith.addi %{{.*}}, %{{.*}} : i32
+! CHECK:               hlfir.assign %{{.*}} to %[[VAR_PRIV_DECL]]#0 : i32, !fir.ref<i32>
+! CHECK:             }
+! CHECK:           }
+! CHECK:         }
+! CHECK:       }
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index db47276dcefe95..b35853cdffb2e5 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1225,17 +1225,13 @@ parsePrivateList(OpAsmParser &parser,
 }
 
 static void printPrivateList(OpAsmPrinter &p, Operation *op,
-                             ValueRange privateVars, TypeRange privateTypes,
-                             ArrayAttr privateSyms) {
-  // TODO: Remove target-specific logic from this function.
-  auto targetOp = mlir::dyn_cast<mlir::omp::TargetOp>(op);
-  assert(targetOp);
-
+                             Operation::operand_range privateVars,
+                             TypeRange privateTypes, ArrayAttr privateSyms) {
   auto &region = op->getRegion(0);
   auto *argsBegin = region.front().getArguments().begin();
-  MutableArrayRef argsSubrange(argsBegin + targetOp.getMapVars().size(),
-                               argsBegin + targetOp.getMapVars().size() +
-                                   privateTypes.size());
+  MutableArrayRef argsSubrange(argsBegin + privateVars.getBeginOperandIndex(),
+                               argsBegin + privateVars.getBeginOperandIndex() +
+                                   privateVars.size());
   mlir::SmallVector<bool> isByRefVec;
   isByRefVec.resize(privateTypes.size(), false);
   DenseBoolArrayAttr isByRef =



More information about the flang-commits mailing list