[flang-commits] [flang] [mlir] [flang][OpenMP][MLIR] Basic support for delayed privatization code-gen (PR #81833)

Fri Feb 16 03:10:50 PST 2024

================
@@ -2617,30 +2730,99 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
   auto reductionCallback = [&](mlir::Operation *op) {
     llvm::SmallVector<mlir::Location> locs(reductionVars.size(),
                                            currentLocation);
-    auto block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {},
-                                                         reductionTypes, locs);
+    auto *block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {},
+                                                          reductionTypes, locs);
     for (auto [arg, prv] :
          llvm::zip_equal(reductionSymbols, block->getArguments())) {
       converter.bindSymbol(*arg, prv);
     }
     return reductionSymbols;
   };
 
-  return genOpWithBody<mlir::omp::ParallelOp>(
+  OpWithBodyGenInfo genInfo =
       OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
           .setGenNested(genNested)
           .setOuterCombined(outerCombined)
           .setClauses(&clauseList)
           .setReductions(&reductionSymbols, &reductionTypes)
-          .setGenRegionEntryCb(reductionCallback),
+          .setGenRegionEntryCb(reductionCallback);
+
+  if (!enableDelayedPrivatization) {
+    return genOpWithBody<mlir::omp::ParallelOp>(
+        genInfo,
+        /*resultTypes=*/mlir::TypeRange(), ifClauseOperand,
+        numThreadsClauseOperand, allocateOperands, allocatorOperands,
+        reductionVars,
+        reductionDeclSymbols.empty()
+            ? nullptr
+            : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
+                                   reductionDeclSymbols),
+        procBindKindAttr, /*private_vars=*/llvm::SmallVector<mlir::Value>{},
+        /*privatizers=*/nullptr);
+  }
+
+  bool privatize = !outerCombined;
+  DataSharingProcessor dsp(converter, clauseList, eval,
+                           /*useDelayedPrivatization=*/true, &symTable);
+
+  if (privatize)
+    dsp.processStep1();
+
+  const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo();
+
+  auto genRegionEntryCB = [&](mlir::Operation *op) {
+    auto parallelOp = llvm::cast<mlir::omp::ParallelOp>(op);
+
+    llvm::SmallVector<mlir::Location> reductionLocs(reductionVars.size(),
+                                                    currentLocation);
+
+    auto privateVars = parallelOp.getPrivateVars();
+    auto &region = parallelOp.getRegion();
+
+    llvm::SmallVector<mlir::Type> privateVarTypes = reductionTypes;
+    privateVarTypes.reserve(privateVars.size());
+    llvm::transform(privateVars, std::back_inserter(privateVarTypes),
+                    [](mlir::Value v) { return v.getType(); });
+
+    llvm::SmallVector<mlir::Location> privateVarLocs = reductionLocs;
+    privateVarLocs.reserve(privateVars.size());
+    llvm::transform(privateVars, std::back_inserter(privateVarLocs),
+                    [](mlir::Value v) { return v.getLoc(); });
----------------
skatrak wrote:

I think this is fine at the moment, but maybe we should start thinking about how to better deal with matching multiple block arguments with their different uses during creation and also later when they're used. At this point we only have to deal with privatization and reduction-related block arguments at the same time, but other operations also use them for loop induction variables or target data movements.

It can become quite difficult to keep track of the expected ordering and uses of block arguments for each operation if we implement that handling ad-hoc, so maybe there is something we could define at the MLIR operation level to centralize that logic.

https://github.com/llvm/llvm-project/pull/81833