[flang] [llvm] [WIP][PoC][flang] Re-use OpenMP data environemnt clauses for locality spec (PR #128148)

Kareem Ergawy via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 21 02:48:34 PST 2025


https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/128148

>From 4dd5222c0392d4f95b5e8d6ccbbb082987bec210 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Wed, 19 Feb 2025 05:21:26 -0600
Subject: [PATCH 1/6] [flang] Extend `fir.do_loop` to reuse OpenMP clause
 table-gen records

---
 .../flang/Optimizer/Dialect/CMakeLists.txt       |  4 ++--
 flang/include/flang/Optimizer/Dialect/FIROps.td  | 16 +++++++++++++---
 flang/lib/Optimizer/Dialect/FIROps.cpp           |  8 +++++---
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
index 73f388cbab6c9..da14fcd25a8d3 100644
--- a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
+++ b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
@@ -16,8 +16,8 @@ mlir_tablegen(FIRAttr.cpp.inc -gen-attrdef-defs)
 set(LLVM_TARGET_DEFINITIONS FIROps.td)
 mlir_tablegen(FIROps.h.inc -gen-op-decls)
 mlir_tablegen(FIROps.cpp.inc -gen-op-defs)
-mlir_tablegen(FIROpsTypes.h.inc --gen-typedef-decls)
-mlir_tablegen(FIROpsTypes.cpp.inc --gen-typedef-defs)
+mlir_tablegen(FIROpsTypes.h.inc --gen-typedef-decls -typedefs-dialect=fir)
+mlir_tablegen(FIROpsTypes.cpp.inc --gen-typedef-defs -typedefs-dialect=fir)
 add_public_tablegen_target(FIROpsIncGen)
 
 set(LLVM_TARGET_DEFINITIONS FortranVariableInterface.td)
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 8dbc9df9f553d..eab1ebbf16fb5 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -16,6 +16,7 @@
 
 include "mlir/Dialect/Arith/IR/ArithBase.td"
 include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td"
+include "mlir/Dialect/OpenMP/OpenMPClauses.td"
 include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td"
 include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td"
 include "flang/Optimizer/Dialect/FIRDialect.td"
@@ -2171,7 +2172,7 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
   let hasVerifier = 1;
   let hasCustomAssemblyFormat = 1;
 
-  let arguments = (ins
+  defvar opArgs = (ins
     Index:$lowerBound,
     Index:$upperBound,
     Index:$step,
@@ -2182,6 +2183,8 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
     OptionalAttr<ArrayAttr>:$reduceAttrs,
     OptionalAttr<LoopAnnotationAttr>:$loopAnnotation
   );
+
+  let arguments = !con(opArgs, OpenMP_PrivateClause.arguments);
   let results = (outs Variadic<AnyType>:$results);
   let regions = (region SizedRegion<1>:$region);
 
@@ -2193,10 +2196,13 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
       CArg<"mlir::ValueRange", "std::nullopt">:$iterArgs,
       CArg<"mlir::ValueRange", "std::nullopt">:$reduceOperands,
       CArg<"llvm::ArrayRef<mlir::Attribute>", "{}">:$reduceAttrs,
-      CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes)>
+      CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes,
+      CArg<"mlir::ValueRange", "std::nullopt">:$private_vars,
+      CArg<"mlir::ArrayRef<mlir::Attribute>", "{}">:$private_syms
+      )>
   ];
 
-  let extraClassDeclaration = [{
+  defvar opExtraClassDeclaration = [{
     mlir::Value getInductionVar() { return getBody()->getArgument(0); }
     mlir::OpBuilder getBodyBuilder() {
       return mlir::OpBuilder(getBody(), std::prev(getBody()->end()));
@@ -2258,6 +2264,10 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
                            unsigned resultNum);
     mlir::Value blockArgToSourceOp(unsigned blockArgNum);
   }];
+
+  let extraClassDeclaration =
+    !strconcat(opExtraClassDeclaration, "\n",
+               OpenMP_PrivateClause.extraClassDeclaration);
 }
 
 def fir_IfOp : region_Op<"if", [DeclareOpInterfaceMethods<RegionBranchOpInterface, [
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 7e50622db08c9..3e04327d67a00 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -2478,14 +2478,16 @@ void fir::DoLoopOp::build(mlir::OpBuilder &builder,
                           bool finalCountValue, mlir::ValueRange iterArgs,
                           mlir::ValueRange reduceOperands,
                           llvm::ArrayRef<mlir::Attribute> reduceAttrs,
-                          llvm::ArrayRef<mlir::NamedAttribute> attributes) {
+                          llvm::ArrayRef<mlir::NamedAttribute> attributes,
+                          mlir::ValueRange privateVars,
+                          mlir::ArrayRef<mlir::Attribute> privateSyms) {
   result.addOperands({lb, ub, step});
   result.addOperands(reduceOperands);
   result.addOperands(iterArgs);
   result.addAttribute(getOperandSegmentSizeAttr(),
                       builder.getDenseI32ArrayAttr(
                           {1, 1, 1, static_cast<int32_t>(reduceOperands.size()),
-                           static_cast<int32_t>(iterArgs.size())}));
+                           static_cast<int32_t>(iterArgs.size()), 0}));
   if (finalCountValue) {
     result.addTypes(builder.getIndexType());
     result.addAttribute(getFinalValueAttrName(result.name),
@@ -2591,7 +2593,7 @@ mlir::ParseResult fir::DoLoopOp::parse(mlir::OpAsmParser &parser,
   result.addAttribute(getOperandSegmentSizeAttr(),
                       builder.getDenseI32ArrayAttr(
                           {1, 1, 1, static_cast<int32_t>(reduceOperands.size()),
-                           static_cast<int32_t>(iterOperands.size())}));
+                           static_cast<int32_t>(iterOperands.size()), 0}));
 
   if (parser.parseOptionalAttrDictWithKeyword(result.attributes))
     return mlir::failure();

>From bb165a2f646e2887e0a6af1f915476c3cf87bf16 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Wed, 19 Feb 2025 05:57:57 -0600
Subject: [PATCH 2/6] [flang] Parsing and printing for `fir.do_loop` with
 `private` specifiers

---
 .../include/flang/Optimizer/Dialect/FIROps.td |  25 ++++-
 flang/lib/Optimizer/Dialect/FIROps.cpp        | 100 +++++++++++++++---
 2 files changed, 106 insertions(+), 19 deletions(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index eab1ebbf16fb5..03a10c03cc1b6 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -2203,20 +2203,37 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
   ];
 
   defvar opExtraClassDeclaration = [{
-    mlir::Value getInductionVar() { return getBody()->getArgument(0); }
     mlir::OpBuilder getBodyBuilder() {
       return mlir::OpBuilder(getBody(), std::prev(getBody()->end()));
     }
+
+    /// Region argument accessors.
+    mlir::Value getInductionVar() { return getBody()->getArgument(0); }
     mlir::Block::BlockArgListType getRegionIterArgs() {
-      return getBody()->getArguments().drop_front();
+      // 1 for skipping the induction variable.
+      return getBody()->getArguments().slice(1, getNumIterOperands());
     }
+    mlir::Block::BlockArgListType getRegionPrivateArgs() {
+     return getBody()->getArguments().slice(1 + getNumIterOperands(),
+                                            numPrivateBlockArgs());
+    }
+
+    /// Operation operand accessors.
     mlir::Operation::operand_range getIterOperands() {
       return getOperands()
-          .drop_front(getNumControlOperands() + getNumReduceOperands());
+          .slice(getNumControlOperands() + getNumReduceOperands(),
+                 getNumIterOperands());
     }
     llvm::MutableArrayRef<mlir::OpOperand> getInitsMutable() {
       return getOperation()->getOpOperands()
-          .drop_front(getNumControlOperands() + getNumReduceOperands());
+          .slice(getNumControlOperands() + getNumReduceOperands(),
+                 getNumIterOperands());
+    }
+    mlir::Operation::operand_range getPrivateOperands() {
+      return getOperands()
+          .slice(getNumControlOperands() + getNumReduceOperands()
+                                         + getNumIterOperands(),
+                 numPrivateBlockArgs());
     }
 
     void setLowerBound(mlir::Value bound) { (*this)->setOperand(0, bound); }
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 3e04327d67a00..c729414cd2393 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -2563,8 +2563,9 @@ mlir::ParseResult fir::DoLoopOp::parse(mlir::OpAsmParser &parser,
 
   // Parse the optional initial iteration arguments.
   llvm::SmallVector<mlir::OpAsmParser::Argument> regionArgs;
-  llvm::SmallVector<mlir::OpAsmParser::UnresolvedOperand> iterOperands;
   llvm::SmallVector<mlir::Type> argTypes;
+
+  llvm::SmallVector<mlir::OpAsmParser::UnresolvedOperand> iterOperands;
   bool prependCount = false;
   regionArgs.push_back(inductionVariable);
 
@@ -2589,15 +2590,6 @@ mlir::ParseResult fir::DoLoopOp::parse(mlir::OpAsmParser &parser,
     prependCount = true;
   }
 
-  // Set the operandSegmentSizes attribute
-  result.addAttribute(getOperandSegmentSizeAttr(),
-                      builder.getDenseI32ArrayAttr(
-                          {1, 1, 1, static_cast<int32_t>(reduceOperands.size()),
-                           static_cast<int32_t>(iterOperands.size()), 0}));
-
-  if (parser.parseOptionalAttrDictWithKeyword(result.attributes))
-    return mlir::failure();
-
   // Induction variable.
   if (prependCount)
     result.addAttribute(DoLoopOp::getFinalValueAttrName(result.name),
@@ -2606,15 +2598,77 @@ mlir::ParseResult fir::DoLoopOp::parse(mlir::OpAsmParser &parser,
     argTypes.push_back(indexType);
   // Loop carried variables
   argTypes.append(result.types.begin(), result.types.end());
-  // Parse the body region.
-  auto *body = result.addRegion();
+
   if (regionArgs.size() != argTypes.size())
     return parser.emitError(
         parser.getNameLoc(),
         "mismatch in number of loop-carried values and defined values");
+
+  llvm::SmallVector<mlir::OpAsmParser::UnresolvedOperand> privateOperands;
+  if (succeeded(parser.parseOptionalKeyword("private"))) {
+    std::size_t oldArgTypesSize = argTypes.size();
+    if (failed(parser.parseLParen()))
+      return mlir::failure();
+
+    llvm::SmallVector<mlir::SymbolRefAttr> privateSymbolVec;
+    if (failed(parser.parseCommaSeparatedList([&]() {
+          if (failed(parser.parseAttribute(privateSymbolVec.emplace_back())))
+            return mlir::failure();
+
+          if (parser.parseOperand(privateOperands.emplace_back()) ||
+              parser.parseArrow() ||
+              parser.parseArgument(regionArgs.emplace_back()))
+            return mlir::failure();
+
+          return mlir::success();
+        })))
+      return mlir::failure();
+
+    if (failed(parser.parseColon()))
+      return mlir::failure();
+
+    if (failed(parser.parseCommaSeparatedList([&]() {
+          if (failed(parser.parseType(argTypes.emplace_back())))
+            return mlir::failure();
+
+          return mlir::success();
+        })))
+      return mlir::failure();
+
+    if (regionArgs.size() != argTypes.size())
+      return parser.emitError(parser.getNameLoc(),
+                              "mismatch in number of private arg and types");
+
+    if (failed(parser.parseRParen()))
+      return mlir::failure();
+
+    for (auto operandType : llvm::zip_equal(
+             privateOperands, llvm::drop_begin(argTypes, oldArgTypesSize)))
+      if (parser.resolveOperand(std::get<0>(operandType),
+                                std::get<1>(operandType), result.operands))
+        return mlir::failure();
+
+    llvm::SmallVector<mlir::Attribute> symbolAttrs(privateSymbolVec.begin(),
+                                                   privateSymbolVec.end());
+    result.addAttribute(getPrivateSymsAttrName(result.name),
+                        builder.getArrayAttr(symbolAttrs));
+  }
+
+  if (parser.parseOptionalAttrDictWithKeyword(result.attributes))
+    return mlir::failure();
+
+  // Set the operandSegmentSizes attribute
+  result.addAttribute(getOperandSegmentSizeAttr(),
+                      builder.getDenseI32ArrayAttr(
+                          {1, 1, 1, static_cast<int32_t>(reduceOperands.size()),
+                           static_cast<int32_t>(iterOperands.size()),
+                           static_cast<int32_t>(privateOperands.size())}));
+
   for (size_t i = 0, e = regionArgs.size(); i != e; ++i)
     regionArgs[i].type = argTypes[i];
 
+  // Parse the body region.
+  auto *body = result.addRegion();
   if (parser.parseRegion(*body, regionArgs))
     return mlir::failure();
 
@@ -2708,9 +2762,25 @@ void fir::DoLoopOp::print(mlir::OpAsmPrinter &p) {
     p << " -> " << getResultTypes();
     printBlockTerminators = true;
   }
-  p.printOptionalAttrDictWithKeyword(
-      (*this)->getAttrs(),
-      {"unordered", "finalValue", "reduceAttrs", "operandSegmentSizes"});
+
+  if (numPrivateBlockArgs() > 0) {
+    p << " private(";
+    llvm::interleaveComma(llvm::zip_equal(getPrivateSymsAttr(),
+                                          getPrivateVars(),
+                                          getRegionPrivateArgs()),
+                          p, [&](auto it) {
+                            p << std::get<0>(it) << " " << std::get<1>(it)
+                              << " -> " << std::get<2>(it);
+                          });
+    p << " : ";
+    llvm::interleaveComma(getPrivateVars(), p,
+                          [&](auto it) { p << it.getType(); });
+    p << ")";
+  }
+
+  p.printOptionalAttrDictWithKeyword((*this)->getAttrs(),
+                                     {"unordered", "finalValue", "reduceAttrs",
+                                      "operandSegmentSizes", "private_syms"});
   p << ' ';
   p.printRegion(getRegion(), /*printEntryBlockArgs=*/false,
                 printBlockTerminators);

>From c8cf5a644886bb8dd3ad19be6e3b916ffcbd222c Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Thu, 20 Feb 2025 03:25:32 -0600
Subject: [PATCH 3/6] [flang] Basic lowering of `fir.do_loop` locality
 specifiers

---
 .../include/flang/Optimizer/Dialect/FIROps.td |  8 +---
 .../Transforms/ControlFlowConverter.cpp       | 37 +++++++++++++++++++
 2 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 03a10c03cc1b6..34647263d6cc7 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -2229,12 +2229,6 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
           .slice(getNumControlOperands() + getNumReduceOperands(),
                  getNumIterOperands());
     }
-    mlir::Operation::operand_range getPrivateOperands() {
-      return getOperands()
-          .slice(getNumControlOperands() + getNumReduceOperands()
-                                         + getNumIterOperands(),
-                 numPrivateBlockArgs());
-    }
 
     void setLowerBound(mlir::Value bound) { (*this)->setOperand(0, bound); }
     void setUpperBound(mlir::Value bound) { (*this)->setOperand(1, bound); }
@@ -2242,7 +2236,7 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
 
     /// Number of region arguments for loop-carried values
     unsigned getNumRegionIterArgs() {
-      return getBody()->getNumArguments() - 1;
+      return getNumIterOperands();
     }
     /// Number of operands controlling the loop: lb, ub, step
     unsigned getNumControlOperands() { return 3; }
diff --git a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp
index b09bbf6106dbb..d3eaf963f3667 100644
--- a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp
+++ b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp
@@ -32,6 +32,19 @@ using namespace fir;
 using namespace mlir;
 
 namespace {
+/// Looks up from the operation from and returns the PrivateClauseOp with
+/// name symbolName
+///
+/// TODO Copied from OpenMPToLLVMIRTranslation.cpp, move to a shared location.
+/// Maybe a static function on the `PrivateClauseOp`.
+static omp::PrivateClauseOp findPrivatizer(Operation *from,
+                                           SymbolRefAttr symbolName) {
+  omp::PrivateClauseOp privatizer =
+      SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(from,
+                                                                 symbolName);
+  assert(privatizer && "privatizer not found in the symbol table");
+  return privatizer;
+}
 
 // Conversion of fir control ops to more primitive control-flow.
 //
@@ -57,6 +70,30 @@ class CfgLoopConv : public mlir::OpRewritePattern<fir::DoLoopOp> {
     auto iofAttr = mlir::arith::IntegerOverflowFlagsAttr::get(
         rewriter.getContext(), flags);
 
+    // Handle privatization
+    if (!loop.getPrivateVars().empty()) {
+      mlir::OpBuilder::InsertionGuard guard(rewriter);
+      rewriter.setInsertionPointToStart(&loop.getRegion().front());
+
+      std::optional<ArrayAttr> privateSyms = loop.getPrivateSyms();
+
+      for (auto [privateVar, privateArg, privatizerSym] :
+           llvm::zip_equal(loop.getPrivateVars(), loop.getRegionPrivateArgs(),
+                           *privateSyms)) {
+        SymbolRefAttr privatizerName = llvm::cast<SymbolRefAttr>(privatizerSym);
+        omp::PrivateClauseOp privatizer = findPrivatizer(loop, privatizerName);
+
+        mlir::Value localAlloc =
+            rewriter.create<fir::AllocaOp>(loop.getLoc(), privatizer.getType());
+        rewriter.replaceAllUsesWith(privateArg, localAlloc);
+      }
+
+      loop.getRegion().front().eraseArguments(1 + loop.getNumRegionIterArgs(),
+                                              loop.numPrivateBlockArgs());
+      loop.getPrivateVarsMutable().clear();
+      loop.setPrivateSymsAttr(nullptr);
+    }
+
     // Create the start and end blocks that will wrap the DoLoopOp with an
     // initalizer and an end point
     auto *initBlock = rewriter.getInsertionBlock();

>From ab60385e385f69e39e49efbd09a9cc82a67d6d9a Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Thu, 20 Feb 2025 05:21:10 -0600
Subject: [PATCH 4/6] [flang] Basic lowering of `fir.do_loop`'s `local_init`
 specifier

---
 .../Transforms/ControlFlowConverter.cpp       | 22 ++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp
index d3eaf963f3667..88779e6ebd977 100644
--- a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp
+++ b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp
@@ -74,7 +74,6 @@ class CfgLoopConv : public mlir::OpRewritePattern<fir::DoLoopOp> {
     if (!loop.getPrivateVars().empty()) {
       mlir::OpBuilder::InsertionGuard guard(rewriter);
       rewriter.setInsertionPointToStart(&loop.getRegion().front());
-
       std::optional<ArrayAttr> privateSyms = loop.getPrivateSyms();
 
       for (auto [privateVar, privateArg, privatizerSym] :
@@ -85,6 +84,27 @@ class CfgLoopConv : public mlir::OpRewritePattern<fir::DoLoopOp> {
 
         mlir::Value localAlloc =
             rewriter.create<fir::AllocaOp>(loop.getLoc(), privatizer.getType());
+
+        if (privatizer.getDataSharingType() ==
+            omp::DataSharingClauseType::FirstPrivate) {
+          mlir::Block *beforeLocalInit = rewriter.getInsertionBlock();
+          mlir::Block *afterLocalInit = rewriter.splitBlock(
+              rewriter.getInsertionBlock(), rewriter.getInsertionPoint());
+          rewriter.cloneRegionBefore(privatizer.getCopyRegion(),
+                                     afterLocalInit);
+          mlir::Block* copyRegionFront = beforeLocalInit->getNextNode();
+          mlir::Block* copyRegionBack = afterLocalInit->getPrevNode();
+
+          rewriter.setInsertionPoint(beforeLocalInit, beforeLocalInit->end());
+          rewriter.create<mlir::cf::BranchOp>(
+              loc, copyRegionFront,
+              llvm::SmallVector<mlir::Value>{privateVar, privateArg});
+
+          rewriter.eraseOp(copyRegionBack->getTerminator());
+          rewriter.setInsertionPoint(copyRegionBack, copyRegionBack->end());
+          rewriter.create<mlir::cf::BranchOp>(loc, afterLocalInit);
+        }
+
         rewriter.replaceAllUsesWith(privateArg, localAlloc);
       }
 

>From b898f1dcb2c25076c310974596d0b90e1e02370a Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Fri, 21 Feb 2025 02:20:54 -0600
Subject: [PATCH 5/6] Add sample test

---
 do_loop_with_local_and_local_init.mlir | 49 ++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 do_loop_with_local_and_local_init.mlir

diff --git a/do_loop_with_local_and_local_init.mlir b/do_loop_with_local_and_local_init.mlir
new file mode 100644
index 0000000000000..06510b4433f1a
--- /dev/null
+++ b/do_loop_with_local_and_local_init.mlir
@@ -0,0 +1,49 @@
+// For testing:
+// 1. parsing/printing (roundtripping): `fir-opt do_loop_with_local_and_local_init.mlir -o roundtrip.mlir`
+// 2. Lowering locality specs during CFG: `fir-opt --cfg-conversion do_loop_with_local_and_local_init.mlir -o after_cfg_lowering.mlir`
+
+// TODO I will add both of the above steps as proper tests when the PoC is complete.
+module attributes {dlti.dl_spec = #dlti.dl_spec<i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, "dlti.endianness" = "little", "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 21.0.0 (/home/kaergawy/git/aomp20.0/llvm-project/flang c8cf5a644886bb8dd3ad19be6e3b916ffcbd222c)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
+
+  omp.private {type = private} @local_privatizer : i32
+
+  omp.private {type = firstprivate} @local_init_privatizer : i32 copy {
+  ^bb0(%arg0: !fir.ref<i32>, %arg1: !fir.ref<i32>):
+      %0 = fir.load %arg0 : !fir.ref<i32>
+      fir.store %0 to %arg1 : !fir.ref<i32>
+      omp.yield(%arg1 : !fir.ref<i32>)
+  }
+
+  func.func @_QPomploop() {
+    %0 = fir.alloca i32 {bindc_name = "i"}
+    %1:2 = hlfir.declare %0 {uniq_name = "_QFomploopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+    %2 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomploopEi"}
+    %3:2 = hlfir.declare %2 {uniq_name = "_QFomploopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+    %4 = fir.alloca i32 {bindc_name = "local_init_var", uniq_name = "_QFomploopElocal_init_var"}
+    %5:2 = hlfir.declare %4 {uniq_name = "_QFomploopElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+    %6 = fir.alloca i32 {bindc_name = "local_var", uniq_name = "_QFomploopElocal_var"}
+    %7:2 = hlfir.declare %6 {uniq_name = "_QFomploopElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+    %c1_i32 = arith.constant 1 : i32
+    %8 = fir.convert %c1_i32 : (i32) -> index
+    %c10_i32 = arith.constant 10 : i32
+    %9 = fir.convert %c10_i32 : (i32) -> index
+    %c1 = arith.constant 1 : index
+    fir.do_loop %arg0 = %8 to %9 step %c1 unordered private(@local_privatizer %7#0 -> %arg1, @local_init_privatizer %5#0 -> %arg2 : !fir.ref<i32>, !fir.ref<i32>) {
+      %10 = fir.convert %arg0 : (index) -> i32
+      fir.store %10 to %1#1 : !fir.ref<i32>
+      %12:2 = hlfir.declare %arg1 {uniq_name = "_QFomploopElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+      %14:2 = hlfir.declare %arg2 {uniq_name = "_QFomploopElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+      %16 = fir.load %1#0 : !fir.ref<i32>
+      %c5_i32 = arith.constant 5 : i32
+      %17 = arith.cmpi slt, %16, %c5_i32 : i32
+      fir.if %17 {
+        %c42_i32 = arith.constant 42 : i32
+        hlfir.assign %c42_i32 to %12#0 : i32, !fir.ref<i32>
+      } else {
+        %c84_i32 = arith.constant 84 : i32
+        hlfir.assign %c84_i32 to %14#0 : i32, !fir.ref<i32>
+      }
+    }
+    return
+  }
+}

>From fc0a6385b31cb8038cd9a7ffe5a6d5715891cdad Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Thu, 20 Feb 2025 08:07:58 -0600
Subject: [PATCH 6/6] [flang] Basic PFT to MLIR lowering for `do concurrent`
 locality specifiers

---
 do_loop_with_local_and_local_init.f90         | 15 +++++
 flang/include/flang/Lower/AbstractConverter.h |  3 +
 flang/lib/Lower/Bridge.cpp                    | 55 +++++++++++++++----
 .../lib/Lower/OpenMP/DataSharingProcessor.cpp | 27 +++++++--
 flang/lib/Lower/OpenMP/DataSharingProcessor.h | 10 +++-
 5 files changed, 93 insertions(+), 17 deletions(-)
 create mode 100644 do_loop_with_local_and_local_init.f90

diff --git a/do_loop_with_local_and_local_init.f90 b/do_loop_with_local_and_local_init.f90
new file mode 100644
index 0000000000000..55642f7cb4024
--- /dev/null
+++ b/do_loop_with_local_and_local_init.f90
@@ -0,0 +1,15 @@
+! For testing try: `flang -fc1 -emit-hlfir -mmlir --openmp-enable-delayed-privatization-staging=true do_loop_with_local_and_local_init.f90 -o test.mlir
+
+! TODO Will be added as proper test later.
+subroutine omploop
+  implicit none
+  integer :: i, local_var, local_init_var
+
+  do concurrent (i=1:10) local(local_var) local_init(local_init_var)
+    if (i < 5) then
+      local_var = 42
+    else 
+      local_init_var = 84
+    end if
+  end do
+end subroutine
diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index 1d1323642bf9c..81c220e29e164 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -348,6 +348,9 @@ class AbstractConverter {
   virtual Fortran::lower::SymbolBox
   lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0;
 
+  virtual Fortran::lower::SymbolBox
+  shallowLookupSymbol(const Fortran::semantics::Symbol &sym) = 0;
+
   /// Return the mlir::SymbolTable associated to the ModuleOp.
   /// Look-ups are faster using it than using module.lookup<>,
   /// but the module op should be queried in case of failure
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 7c217ce2f404c..669190360889d 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -12,6 +12,8 @@
 
 #include "flang/Lower/Bridge.h"
 
+#include "OpenMP/DataSharingProcessor.h"
+#include "OpenMP/Utils.h"
 #include "flang/Lower/Allocatable.h"
 #include "flang/Lower/CallInterface.h"
 #include "flang/Lower/Coarray.h"
@@ -1136,6 +1138,14 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     return name;
   }
 
+  /// Find the symbol in the inner-most level of the local map or return null.
+  Fortran::lower::SymbolBox
+  shallowLookupSymbol(const Fortran::semantics::Symbol &sym) override {
+    if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym))
+      return v;
+    return {};
+  }
+
 private:
   FirConverter() = delete;
   FirConverter(const FirConverter &) = delete;
@@ -1210,14 +1220,6 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     return {};
   }
 
-  /// Find the symbol in the inner-most level of the local map or return null.
-  Fortran::lower::SymbolBox
-  shallowLookupSymbol(const Fortran::semantics::Symbol &sym) {
-    if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym))
-      return v;
-    return {};
-  }
-
   /// Find the symbol in one level up of symbol map such as for host-association
   /// in OpenMP code or return null.
   Fortran::lower::SymbolBox
@@ -2014,12 +2016,29 @@ class FirConverter : public Fortran::lower::AbstractConverter {
 
   /// Create DO CONCURRENT construct symbol bindings and generate LOCAL_INIT
   /// assignments.
-  void handleLocalitySpecs(const IncrementLoopInfo &info) {
+  void handleLocalitySpecs(IncrementLoopInfo &info) {
     Fortran::semantics::SemanticsContext &semanticsContext =
         bridge.getSemanticsContext();
-    for (const Fortran::semantics::Symbol *sym : info.localSymList)
+    Fortran::lower::omp::DataSharingProcessor dsp(
+        *this, semanticsContext, getEval(),
+        /*useDelayedPrivatization=*/true, localSymbols);
+    mlir::omp::PrivateClauseOps privateClauseOps;
+
+    for (const Fortran::semantics::Symbol *sym : info.localSymList) {
+      if (enableDelayedPrivatizationStaging) {
+        dsp.doPrivatize(sym, &privateClauseOps);
+        continue;
+      }
+
       createHostAssociateVarClone(*sym, /*skipDefaultInit=*/false);
+    }
+
     for (const Fortran::semantics::Symbol *sym : info.localInitSymList) {
+      if (enableDelayedPrivatizationStaging) {
+        dsp.doPrivatize(sym, &privateClauseOps);
+        continue;
+      }
+
       createHostAssociateVarClone(*sym, /*skipDefaultInit=*/true);
       const auto *hostDetails =
           sym->detailsIf<Fortran::semantics::HostAssocDetails>();
@@ -2033,11 +2052,27 @@ class FirConverter : public Fortran::lower::AbstractConverter {
         assign.u = Fortran::evaluate::Assignment::BoundsSpec{};
       genAssignment(assign);
     }
+
     for (const Fortran::semantics::Symbol *sym : info.sharedSymList) {
       const auto *hostDetails =
           sym->detailsIf<Fortran::semantics::HostAssocDetails>();
       copySymbolBinding(hostDetails->symbol(), *sym);
     }
+
+    info.doLoop.getPrivateVarsMutable().assign(privateClauseOps.privateVars);
+    info.doLoop.setPrivateSymsAttr(
+        builder->getArrayAttr(privateClauseOps.privateSyms));
+
+    for (auto [sym, privateVar] : llvm::zip_equal(
+             dsp.getAllSymbolsToPrivatize(), privateClauseOps.privateVars)) {
+      auto arg = info.doLoop.getRegion().begin()->addArgument(
+          privateVar.getType(), info.doLoop.getLoc());
+      bindSymbol(*sym, hlfir::translateToExtendedValue(
+                           privateVar.getLoc(), *builder, hlfir::Entity{arg},
+                           /*contiguousHint=*/true)
+                           .first);
+    }
+
     // Note that allocatable, types with ultimate components, and type
     // requiring finalization are forbidden in LOCAL/LOCAL_INIT (F2023 C1130),
     // so no clean-up needs to be generated for these entities.
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index d13f101f516e7..26615cd97dd31 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -53,6 +53,15 @@ DataSharingProcessor::DataSharingProcessor(
   });
 }
 
+DataSharingProcessor::DataSharingProcessor(lower::AbstractConverter &converter,
+                                           semantics::SemanticsContext &semaCtx,
+                                           lower::pft::Evaluation &eval,
+                                           bool useDelayedPrivatization,
+                                           lower::SymMap &symTable)
+    : DataSharingProcessor(converter, semaCtx, {}, eval,
+                           /*shouldCollectPreDeterminedSymols=*/false,
+                           useDelayedPrivatization, symTable) {}
+
 void DataSharingProcessor::processStep1(
     mlir::omp::PrivateClauseOps *clauseOps) {
   collectSymbolsForPrivatization();
@@ -498,20 +507,26 @@ void DataSharingProcessor::copyLastPrivatize(mlir::Operation *op) {
     }
 }
 
-void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
+void DataSharingProcessor::doPrivatize(const semantics::Symbol *symToPrivatize,
                                        mlir::omp::PrivateClauseOps *clauseOps) {
   if (!useDelayedPrivatization) {
-    cloneSymbol(sym);
-    copyFirstPrivateSymbol(sym);
+    cloneSymbol(symToPrivatize);
+    copyFirstPrivateSymbol(symToPrivatize);
     return;
   }
 
-  lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym);
+  const semantics::Symbol *sym = symToPrivatize->HasLocalLocality()
+                                     ? &symToPrivatize->GetUltimate()
+                                     : symToPrivatize;
+  lower::SymbolBox hsb = symToPrivatize->HasLocalLocality()
+                             ? converter.shallowLookupSymbol(*sym)
+                             : converter.lookupOneLevelUpSymbol(*sym);
   assert(hsb && "Host symbol box not found");
 
   mlir::Location symLoc = hsb.getAddr().getLoc();
   std::string privatizerName = sym->name().ToString() + ".privatizer";
-  bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate);
+  bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate) ||
+                        sym->test(semantics::Symbol::Flag::LocalityLocalInit);
 
   mlir::Value privVal = hsb.getAddr();
   mlir::Type allocType = privVal.getType();
@@ -638,6 +653,8 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
   }
 
   symToPrivatizer[sym] = privatizerOp;
+  if (symToPrivatize->HasLocalLocality())
+    allPrivatizedSymbols.insert(symToPrivatize);
 }
 
 } // namespace omp
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
index 54a42fd199831..f5fef9f6dfe85 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
@@ -105,8 +105,6 @@ class DataSharingProcessor {
   void collectImplicitSymbols();
   void collectPreDeterminedSymbols();
   void privatize(mlir::omp::PrivateClauseOps *clauseOps);
-  void doPrivatize(const semantics::Symbol *sym,
-                   mlir::omp::PrivateClauseOps *clauseOps);
   void copyLastPrivatize(mlir::Operation *op);
   void insertLastPrivateCompare(mlir::Operation *op);
   void cloneSymbol(const semantics::Symbol *sym);
@@ -125,6 +123,11 @@ class DataSharingProcessor {
                        bool shouldCollectPreDeterminedSymbols,
                        bool useDelayedPrivatization, lower::SymMap &symTable);
 
+  DataSharingProcessor(lower::AbstractConverter &converter,
+                       semantics::SemanticsContext &semaCtx,
+                       lower::pft::Evaluation &eval,
+                       bool useDelayedPrivatization, lower::SymMap &symTable);
+
   // Privatisation is split into two steps.
   // Step1 performs cloning of all privatisation clauses and copying for
   // firstprivates. Step1 is performed at the place where process/processStep1
@@ -151,6 +154,9 @@ class DataSharingProcessor {
                ? allPrivatizedSymbols.getArrayRef()
                : llvm::ArrayRef<const semantics::Symbol *>();
   }
+
+  void doPrivatize(const semantics::Symbol *sym,
+                   mlir::omp::PrivateClauseOps *clauseOps);
 };
 
 } // namespace omp



More information about the llvm-commits mailing list