[flang] [llvm] [WIP][PoC][flang] Re-use OpenMP data environemnt clauses for locality spec (PR #128148)
Kareem Ergawy via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 21 02:48:34 PST 2025
https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/128148
>From 4dd5222c0392d4f95b5e8d6ccbbb082987bec210 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Wed, 19 Feb 2025 05:21:26 -0600
Subject: [PATCH 1/6] [flang] Extend `fir.do_loop` to reuse OpenMP clause
table-gen records
---
.../flang/Optimizer/Dialect/CMakeLists.txt | 4 ++--
flang/include/flang/Optimizer/Dialect/FIROps.td | 16 +++++++++++++---
flang/lib/Optimizer/Dialect/FIROps.cpp | 8 +++++---
3 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
index 73f388cbab6c9..da14fcd25a8d3 100644
--- a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
+++ b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
@@ -16,8 +16,8 @@ mlir_tablegen(FIRAttr.cpp.inc -gen-attrdef-defs)
set(LLVM_TARGET_DEFINITIONS FIROps.td)
mlir_tablegen(FIROps.h.inc -gen-op-decls)
mlir_tablegen(FIROps.cpp.inc -gen-op-defs)
-mlir_tablegen(FIROpsTypes.h.inc --gen-typedef-decls)
-mlir_tablegen(FIROpsTypes.cpp.inc --gen-typedef-defs)
+mlir_tablegen(FIROpsTypes.h.inc --gen-typedef-decls -typedefs-dialect=fir)
+mlir_tablegen(FIROpsTypes.cpp.inc --gen-typedef-defs -typedefs-dialect=fir)
add_public_tablegen_target(FIROpsIncGen)
set(LLVM_TARGET_DEFINITIONS FortranVariableInterface.td)
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 8dbc9df9f553d..eab1ebbf16fb5 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -16,6 +16,7 @@
include "mlir/Dialect/Arith/IR/ArithBase.td"
include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td"
+include "mlir/Dialect/OpenMP/OpenMPClauses.td"
include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td"
include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td"
include "flang/Optimizer/Dialect/FIRDialect.td"
@@ -2171,7 +2172,7 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
let hasVerifier = 1;
let hasCustomAssemblyFormat = 1;
- let arguments = (ins
+ defvar opArgs = (ins
Index:$lowerBound,
Index:$upperBound,
Index:$step,
@@ -2182,6 +2183,8 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
OptionalAttr<ArrayAttr>:$reduceAttrs,
OptionalAttr<LoopAnnotationAttr>:$loopAnnotation
);
+
+ let arguments = !con(opArgs, OpenMP_PrivateClause.arguments);
let results = (outs Variadic<AnyType>:$results);
let regions = (region SizedRegion<1>:$region);
@@ -2193,10 +2196,13 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
CArg<"mlir::ValueRange", "std::nullopt">:$iterArgs,
CArg<"mlir::ValueRange", "std::nullopt">:$reduceOperands,
CArg<"llvm::ArrayRef<mlir::Attribute>", "{}">:$reduceAttrs,
- CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes)>
+ CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes,
+ CArg<"mlir::ValueRange", "std::nullopt">:$private_vars,
+ CArg<"mlir::ArrayRef<mlir::Attribute>", "{}">:$private_syms
+ )>
];
- let extraClassDeclaration = [{
+ defvar opExtraClassDeclaration = [{
mlir::Value getInductionVar() { return getBody()->getArgument(0); }
mlir::OpBuilder getBodyBuilder() {
return mlir::OpBuilder(getBody(), std::prev(getBody()->end()));
@@ -2258,6 +2264,10 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
unsigned resultNum);
mlir::Value blockArgToSourceOp(unsigned blockArgNum);
}];
+
+ let extraClassDeclaration =
+ !strconcat(opExtraClassDeclaration, "\n",
+ OpenMP_PrivateClause.extraClassDeclaration);
}
def fir_IfOp : region_Op<"if", [DeclareOpInterfaceMethods<RegionBranchOpInterface, [
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 7e50622db08c9..3e04327d67a00 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -2478,14 +2478,16 @@ void fir::DoLoopOp::build(mlir::OpBuilder &builder,
bool finalCountValue, mlir::ValueRange iterArgs,
mlir::ValueRange reduceOperands,
llvm::ArrayRef<mlir::Attribute> reduceAttrs,
- llvm::ArrayRef<mlir::NamedAttribute> attributes) {
+ llvm::ArrayRef<mlir::NamedAttribute> attributes,
+ mlir::ValueRange privateVars,
+ mlir::ArrayRef<mlir::Attribute> privateSyms) {
result.addOperands({lb, ub, step});
result.addOperands(reduceOperands);
result.addOperands(iterArgs);
result.addAttribute(getOperandSegmentSizeAttr(),
builder.getDenseI32ArrayAttr(
{1, 1, 1, static_cast<int32_t>(reduceOperands.size()),
- static_cast<int32_t>(iterArgs.size())}));
+ static_cast<int32_t>(iterArgs.size()), 0}));
if (finalCountValue) {
result.addTypes(builder.getIndexType());
result.addAttribute(getFinalValueAttrName(result.name),
@@ -2591,7 +2593,7 @@ mlir::ParseResult fir::DoLoopOp::parse(mlir::OpAsmParser &parser,
result.addAttribute(getOperandSegmentSizeAttr(),
builder.getDenseI32ArrayAttr(
{1, 1, 1, static_cast<int32_t>(reduceOperands.size()),
- static_cast<int32_t>(iterOperands.size())}));
+ static_cast<int32_t>(iterOperands.size()), 0}));
if (parser.parseOptionalAttrDictWithKeyword(result.attributes))
return mlir::failure();
>From bb165a2f646e2887e0a6af1f915476c3cf87bf16 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Wed, 19 Feb 2025 05:57:57 -0600
Subject: [PATCH 2/6] [flang] Parsing and printing for `fir.do_loop` with
`private` specifiers
---
.../include/flang/Optimizer/Dialect/FIROps.td | 25 ++++-
flang/lib/Optimizer/Dialect/FIROps.cpp | 100 +++++++++++++++---
2 files changed, 106 insertions(+), 19 deletions(-)
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index eab1ebbf16fb5..03a10c03cc1b6 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -2203,20 +2203,37 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
];
defvar opExtraClassDeclaration = [{
- mlir::Value getInductionVar() { return getBody()->getArgument(0); }
mlir::OpBuilder getBodyBuilder() {
return mlir::OpBuilder(getBody(), std::prev(getBody()->end()));
}
+
+ /// Region argument accessors.
+ mlir::Value getInductionVar() { return getBody()->getArgument(0); }
mlir::Block::BlockArgListType getRegionIterArgs() {
- return getBody()->getArguments().drop_front();
+ // 1 for skipping the induction variable.
+ return getBody()->getArguments().slice(1, getNumIterOperands());
}
+ mlir::Block::BlockArgListType getRegionPrivateArgs() {
+ return getBody()->getArguments().slice(1 + getNumIterOperands(),
+ numPrivateBlockArgs());
+ }
+
+ /// Operation operand accessors.
mlir::Operation::operand_range getIterOperands() {
return getOperands()
- .drop_front(getNumControlOperands() + getNumReduceOperands());
+ .slice(getNumControlOperands() + getNumReduceOperands(),
+ getNumIterOperands());
}
llvm::MutableArrayRef<mlir::OpOperand> getInitsMutable() {
return getOperation()->getOpOperands()
- .drop_front(getNumControlOperands() + getNumReduceOperands());
+ .slice(getNumControlOperands() + getNumReduceOperands(),
+ getNumIterOperands());
+ }
+ mlir::Operation::operand_range getPrivateOperands() {
+ return getOperands()
+ .slice(getNumControlOperands() + getNumReduceOperands()
+ + getNumIterOperands(),
+ numPrivateBlockArgs());
}
void setLowerBound(mlir::Value bound) { (*this)->setOperand(0, bound); }
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 3e04327d67a00..c729414cd2393 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -2563,8 +2563,9 @@ mlir::ParseResult fir::DoLoopOp::parse(mlir::OpAsmParser &parser,
// Parse the optional initial iteration arguments.
llvm::SmallVector<mlir::OpAsmParser::Argument> regionArgs;
- llvm::SmallVector<mlir::OpAsmParser::UnresolvedOperand> iterOperands;
llvm::SmallVector<mlir::Type> argTypes;
+
+ llvm::SmallVector<mlir::OpAsmParser::UnresolvedOperand> iterOperands;
bool prependCount = false;
regionArgs.push_back(inductionVariable);
@@ -2589,15 +2590,6 @@ mlir::ParseResult fir::DoLoopOp::parse(mlir::OpAsmParser &parser,
prependCount = true;
}
- // Set the operandSegmentSizes attribute
- result.addAttribute(getOperandSegmentSizeAttr(),
- builder.getDenseI32ArrayAttr(
- {1, 1, 1, static_cast<int32_t>(reduceOperands.size()),
- static_cast<int32_t>(iterOperands.size()), 0}));
-
- if (parser.parseOptionalAttrDictWithKeyword(result.attributes))
- return mlir::failure();
-
// Induction variable.
if (prependCount)
result.addAttribute(DoLoopOp::getFinalValueAttrName(result.name),
@@ -2606,15 +2598,77 @@ mlir::ParseResult fir::DoLoopOp::parse(mlir::OpAsmParser &parser,
argTypes.push_back(indexType);
// Loop carried variables
argTypes.append(result.types.begin(), result.types.end());
- // Parse the body region.
- auto *body = result.addRegion();
+
if (regionArgs.size() != argTypes.size())
return parser.emitError(
parser.getNameLoc(),
"mismatch in number of loop-carried values and defined values");
+
+ llvm::SmallVector<mlir::OpAsmParser::UnresolvedOperand> privateOperands;
+ if (succeeded(parser.parseOptionalKeyword("private"))) {
+ std::size_t oldArgTypesSize = argTypes.size();
+ if (failed(parser.parseLParen()))
+ return mlir::failure();
+
+ llvm::SmallVector<mlir::SymbolRefAttr> privateSymbolVec;
+ if (failed(parser.parseCommaSeparatedList([&]() {
+ if (failed(parser.parseAttribute(privateSymbolVec.emplace_back())))
+ return mlir::failure();
+
+ if (parser.parseOperand(privateOperands.emplace_back()) ||
+ parser.parseArrow() ||
+ parser.parseArgument(regionArgs.emplace_back()))
+ return mlir::failure();
+
+ return mlir::success();
+ })))
+ return mlir::failure();
+
+ if (failed(parser.parseColon()))
+ return mlir::failure();
+
+ if (failed(parser.parseCommaSeparatedList([&]() {
+ if (failed(parser.parseType(argTypes.emplace_back())))
+ return mlir::failure();
+
+ return mlir::success();
+ })))
+ return mlir::failure();
+
+ if (regionArgs.size() != argTypes.size())
+ return parser.emitError(parser.getNameLoc(),
+ "mismatch in number of private arg and types");
+
+ if (failed(parser.parseRParen()))
+ return mlir::failure();
+
+ for (auto operandType : llvm::zip_equal(
+ privateOperands, llvm::drop_begin(argTypes, oldArgTypesSize)))
+ if (parser.resolveOperand(std::get<0>(operandType),
+ std::get<1>(operandType), result.operands))
+ return mlir::failure();
+
+ llvm::SmallVector<mlir::Attribute> symbolAttrs(privateSymbolVec.begin(),
+ privateSymbolVec.end());
+ result.addAttribute(getPrivateSymsAttrName(result.name),
+ builder.getArrayAttr(symbolAttrs));
+ }
+
+ if (parser.parseOptionalAttrDictWithKeyword(result.attributes))
+ return mlir::failure();
+
+ // Set the operandSegmentSizes attribute
+ result.addAttribute(getOperandSegmentSizeAttr(),
+ builder.getDenseI32ArrayAttr(
+ {1, 1, 1, static_cast<int32_t>(reduceOperands.size()),
+ static_cast<int32_t>(iterOperands.size()),
+ static_cast<int32_t>(privateOperands.size())}));
+
for (size_t i = 0, e = regionArgs.size(); i != e; ++i)
regionArgs[i].type = argTypes[i];
+ // Parse the body region.
+ auto *body = result.addRegion();
if (parser.parseRegion(*body, regionArgs))
return mlir::failure();
@@ -2708,9 +2762,25 @@ void fir::DoLoopOp::print(mlir::OpAsmPrinter &p) {
p << " -> " << getResultTypes();
printBlockTerminators = true;
}
- p.printOptionalAttrDictWithKeyword(
- (*this)->getAttrs(),
- {"unordered", "finalValue", "reduceAttrs", "operandSegmentSizes"});
+
+ if (numPrivateBlockArgs() > 0) {
+ p << " private(";
+ llvm::interleaveComma(llvm::zip_equal(getPrivateSymsAttr(),
+ getPrivateVars(),
+ getRegionPrivateArgs()),
+ p, [&](auto it) {
+ p << std::get<0>(it) << " " << std::get<1>(it)
+ << " -> " << std::get<2>(it);
+ });
+ p << " : ";
+ llvm::interleaveComma(getPrivateVars(), p,
+ [&](auto it) { p << it.getType(); });
+ p << ")";
+ }
+
+ p.printOptionalAttrDictWithKeyword((*this)->getAttrs(),
+ {"unordered", "finalValue", "reduceAttrs",
+ "operandSegmentSizes", "private_syms"});
p << ' ';
p.printRegion(getRegion(), /*printEntryBlockArgs=*/false,
printBlockTerminators);
>From c8cf5a644886bb8dd3ad19be6e3b916ffcbd222c Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Thu, 20 Feb 2025 03:25:32 -0600
Subject: [PATCH 3/6] [flang] Basic lowering of `fir.do_loop` locality
specifiers
---
.../include/flang/Optimizer/Dialect/FIROps.td | 8 +---
.../Transforms/ControlFlowConverter.cpp | 37 +++++++++++++++++++
2 files changed, 38 insertions(+), 7 deletions(-)
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 03a10c03cc1b6..34647263d6cc7 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -2229,12 +2229,6 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
.slice(getNumControlOperands() + getNumReduceOperands(),
getNumIterOperands());
}
- mlir::Operation::operand_range getPrivateOperands() {
- return getOperands()
- .slice(getNumControlOperands() + getNumReduceOperands()
- + getNumIterOperands(),
- numPrivateBlockArgs());
- }
void setLowerBound(mlir::Value bound) { (*this)->setOperand(0, bound); }
void setUpperBound(mlir::Value bound) { (*this)->setOperand(1, bound); }
@@ -2242,7 +2236,7 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments,
/// Number of region arguments for loop-carried values
unsigned getNumRegionIterArgs() {
- return getBody()->getNumArguments() - 1;
+ return getNumIterOperands();
}
/// Number of operands controlling the loop: lb, ub, step
unsigned getNumControlOperands() { return 3; }
diff --git a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp
index b09bbf6106dbb..d3eaf963f3667 100644
--- a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp
+++ b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp
@@ -32,6 +32,19 @@ using namespace fir;
using namespace mlir;
namespace {
+/// Looks up from the operation from and returns the PrivateClauseOp with
+/// name symbolName
+///
+/// TODO Copied from OpenMPToLLVMIRTranslation.cpp, move to a shared location.
+/// Maybe a static function on the `PrivateClauseOp`.
+static omp::PrivateClauseOp findPrivatizer(Operation *from,
+ SymbolRefAttr symbolName) {
+ omp::PrivateClauseOp privatizer =
+ SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(from,
+ symbolName);
+ assert(privatizer && "privatizer not found in the symbol table");
+ return privatizer;
+}
// Conversion of fir control ops to more primitive control-flow.
//
@@ -57,6 +70,30 @@ class CfgLoopConv : public mlir::OpRewritePattern<fir::DoLoopOp> {
auto iofAttr = mlir::arith::IntegerOverflowFlagsAttr::get(
rewriter.getContext(), flags);
+ // Handle privatization
+ if (!loop.getPrivateVars().empty()) {
+ mlir::OpBuilder::InsertionGuard guard(rewriter);
+ rewriter.setInsertionPointToStart(&loop.getRegion().front());
+
+ std::optional<ArrayAttr> privateSyms = loop.getPrivateSyms();
+
+ for (auto [privateVar, privateArg, privatizerSym] :
+ llvm::zip_equal(loop.getPrivateVars(), loop.getRegionPrivateArgs(),
+ *privateSyms)) {
+ SymbolRefAttr privatizerName = llvm::cast<SymbolRefAttr>(privatizerSym);
+ omp::PrivateClauseOp privatizer = findPrivatizer(loop, privatizerName);
+
+ mlir::Value localAlloc =
+ rewriter.create<fir::AllocaOp>(loop.getLoc(), privatizer.getType());
+ rewriter.replaceAllUsesWith(privateArg, localAlloc);
+ }
+
+ loop.getRegion().front().eraseArguments(1 + loop.getNumRegionIterArgs(),
+ loop.numPrivateBlockArgs());
+ loop.getPrivateVarsMutable().clear();
+ loop.setPrivateSymsAttr(nullptr);
+ }
+
// Create the start and end blocks that will wrap the DoLoopOp with an
// initalizer and an end point
auto *initBlock = rewriter.getInsertionBlock();
>From ab60385e385f69e39e49efbd09a9cc82a67d6d9a Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Thu, 20 Feb 2025 05:21:10 -0600
Subject: [PATCH 4/6] [flang] Basic lowering of `fir.do_loop`'s `local_init`
specifier
---
.../Transforms/ControlFlowConverter.cpp | 22 ++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp
index d3eaf963f3667..88779e6ebd977 100644
--- a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp
+++ b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp
@@ -74,7 +74,6 @@ class CfgLoopConv : public mlir::OpRewritePattern<fir::DoLoopOp> {
if (!loop.getPrivateVars().empty()) {
mlir::OpBuilder::InsertionGuard guard(rewriter);
rewriter.setInsertionPointToStart(&loop.getRegion().front());
-
std::optional<ArrayAttr> privateSyms = loop.getPrivateSyms();
for (auto [privateVar, privateArg, privatizerSym] :
@@ -85,6 +84,27 @@ class CfgLoopConv : public mlir::OpRewritePattern<fir::DoLoopOp> {
mlir::Value localAlloc =
rewriter.create<fir::AllocaOp>(loop.getLoc(), privatizer.getType());
+
+ if (privatizer.getDataSharingType() ==
+ omp::DataSharingClauseType::FirstPrivate) {
+ mlir::Block *beforeLocalInit = rewriter.getInsertionBlock();
+ mlir::Block *afterLocalInit = rewriter.splitBlock(
+ rewriter.getInsertionBlock(), rewriter.getInsertionPoint());
+ rewriter.cloneRegionBefore(privatizer.getCopyRegion(),
+ afterLocalInit);
+ mlir::Block* copyRegionFront = beforeLocalInit->getNextNode();
+ mlir::Block* copyRegionBack = afterLocalInit->getPrevNode();
+
+ rewriter.setInsertionPoint(beforeLocalInit, beforeLocalInit->end());
+ rewriter.create<mlir::cf::BranchOp>(
+ loc, copyRegionFront,
+ llvm::SmallVector<mlir::Value>{privateVar, privateArg});
+
+ rewriter.eraseOp(copyRegionBack->getTerminator());
+ rewriter.setInsertionPoint(copyRegionBack, copyRegionBack->end());
+ rewriter.create<mlir::cf::BranchOp>(loc, afterLocalInit);
+ }
+
rewriter.replaceAllUsesWith(privateArg, localAlloc);
}
>From b898f1dcb2c25076c310974596d0b90e1e02370a Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Fri, 21 Feb 2025 02:20:54 -0600
Subject: [PATCH 5/6] Add sample test
---
do_loop_with_local_and_local_init.mlir | 49 ++++++++++++++++++++++++++
1 file changed, 49 insertions(+)
create mode 100644 do_loop_with_local_and_local_init.mlir
diff --git a/do_loop_with_local_and_local_init.mlir b/do_loop_with_local_and_local_init.mlir
new file mode 100644
index 0000000000000..06510b4433f1a
--- /dev/null
+++ b/do_loop_with_local_and_local_init.mlir
@@ -0,0 +1,49 @@
+// For testing:
+// 1. parsing/printing (roundtripping): `fir-opt do_loop_with_local_and_local_init.mlir -o roundtrip.mlir`
+// 2. Lowering locality specs during CFG: `fir-opt --cfg-conversion do_loop_with_local_and_local_init.mlir -o after_cfg_lowering.mlir`
+
+// TODO I will add both of the above steps as proper tests when the PoC is complete.
+module attributes {dlti.dl_spec = #dlti.dl_spec<i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, "dlti.endianness" = "little", "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 21.0.0 (/home/kaergawy/git/aomp20.0/llvm-project/flang c8cf5a644886bb8dd3ad19be6e3b916ffcbd222c)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
+
+ omp.private {type = private} @local_privatizer : i32
+
+ omp.private {type = firstprivate} @local_init_privatizer : i32 copy {
+ ^bb0(%arg0: !fir.ref<i32>, %arg1: !fir.ref<i32>):
+ %0 = fir.load %arg0 : !fir.ref<i32>
+ fir.store %0 to %arg1 : !fir.ref<i32>
+ omp.yield(%arg1 : !fir.ref<i32>)
+ }
+
+ func.func @_QPomploop() {
+ %0 = fir.alloca i32 {bindc_name = "i"}
+ %1:2 = hlfir.declare %0 {uniq_name = "_QFomploopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %2 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomploopEi"}
+ %3:2 = hlfir.declare %2 {uniq_name = "_QFomploopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %4 = fir.alloca i32 {bindc_name = "local_init_var", uniq_name = "_QFomploopElocal_init_var"}
+ %5:2 = hlfir.declare %4 {uniq_name = "_QFomploopElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %6 = fir.alloca i32 {bindc_name = "local_var", uniq_name = "_QFomploopElocal_var"}
+ %7:2 = hlfir.declare %6 {uniq_name = "_QFomploopElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %c1_i32 = arith.constant 1 : i32
+ %8 = fir.convert %c1_i32 : (i32) -> index
+ %c10_i32 = arith.constant 10 : i32
+ %9 = fir.convert %c10_i32 : (i32) -> index
+ %c1 = arith.constant 1 : index
+ fir.do_loop %arg0 = %8 to %9 step %c1 unordered private(@local_privatizer %7#0 -> %arg1, @local_init_privatizer %5#0 -> %arg2 : !fir.ref<i32>, !fir.ref<i32>) {
+ %10 = fir.convert %arg0 : (index) -> i32
+ fir.store %10 to %1#1 : !fir.ref<i32>
+ %12:2 = hlfir.declare %arg1 {uniq_name = "_QFomploopElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %14:2 = hlfir.declare %arg2 {uniq_name = "_QFomploopElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %16 = fir.load %1#0 : !fir.ref<i32>
+ %c5_i32 = arith.constant 5 : i32
+ %17 = arith.cmpi slt, %16, %c5_i32 : i32
+ fir.if %17 {
+ %c42_i32 = arith.constant 42 : i32
+ hlfir.assign %c42_i32 to %12#0 : i32, !fir.ref<i32>
+ } else {
+ %c84_i32 = arith.constant 84 : i32
+ hlfir.assign %c84_i32 to %14#0 : i32, !fir.ref<i32>
+ }
+ }
+ return
+ }
+}
>From fc0a6385b31cb8038cd9a7ffe5a6d5715891cdad Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Thu, 20 Feb 2025 08:07:58 -0600
Subject: [PATCH 6/6] [flang] Basic PFT to MLIR lowering for `do concurrent`
locality specifiers
---
do_loop_with_local_and_local_init.f90 | 15 +++++
flang/include/flang/Lower/AbstractConverter.h | 3 +
flang/lib/Lower/Bridge.cpp | 55 +++++++++++++++----
.../lib/Lower/OpenMP/DataSharingProcessor.cpp | 27 +++++++--
flang/lib/Lower/OpenMP/DataSharingProcessor.h | 10 +++-
5 files changed, 93 insertions(+), 17 deletions(-)
create mode 100644 do_loop_with_local_and_local_init.f90
diff --git a/do_loop_with_local_and_local_init.f90 b/do_loop_with_local_and_local_init.f90
new file mode 100644
index 0000000000000..55642f7cb4024
--- /dev/null
+++ b/do_loop_with_local_and_local_init.f90
@@ -0,0 +1,15 @@
+! For testing try: `flang -fc1 -emit-hlfir -mmlir --openmp-enable-delayed-privatization-staging=true do_loop_with_local_and_local_init.f90 -o test.mlir
+
+! TODO Will be added as proper test later.
+subroutine omploop
+ implicit none
+ integer :: i, local_var, local_init_var
+
+ do concurrent (i=1:10) local(local_var) local_init(local_init_var)
+ if (i < 5) then
+ local_var = 42
+ else
+ local_init_var = 84
+ end if
+ end do
+end subroutine
diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index 1d1323642bf9c..81c220e29e164 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -348,6 +348,9 @@ class AbstractConverter {
virtual Fortran::lower::SymbolBox
lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0;
+ virtual Fortran::lower::SymbolBox
+ shallowLookupSymbol(const Fortran::semantics::Symbol &sym) = 0;
+
/// Return the mlir::SymbolTable associated to the ModuleOp.
/// Look-ups are faster using it than using module.lookup<>,
/// but the module op should be queried in case of failure
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 7c217ce2f404c..669190360889d 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -12,6 +12,8 @@
#include "flang/Lower/Bridge.h"
+#include "OpenMP/DataSharingProcessor.h"
+#include "OpenMP/Utils.h"
#include "flang/Lower/Allocatable.h"
#include "flang/Lower/CallInterface.h"
#include "flang/Lower/Coarray.h"
@@ -1136,6 +1138,14 @@ class FirConverter : public Fortran::lower::AbstractConverter {
return name;
}
+ /// Find the symbol in the inner-most level of the local map or return null.
+ Fortran::lower::SymbolBox
+ shallowLookupSymbol(const Fortran::semantics::Symbol &sym) override {
+ if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym))
+ return v;
+ return {};
+ }
+
private:
FirConverter() = delete;
FirConverter(const FirConverter &) = delete;
@@ -1210,14 +1220,6 @@ class FirConverter : public Fortran::lower::AbstractConverter {
return {};
}
- /// Find the symbol in the inner-most level of the local map or return null.
- Fortran::lower::SymbolBox
- shallowLookupSymbol(const Fortran::semantics::Symbol &sym) {
- if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym))
- return v;
- return {};
- }
-
/// Find the symbol in one level up of symbol map such as for host-association
/// in OpenMP code or return null.
Fortran::lower::SymbolBox
@@ -2014,12 +2016,29 @@ class FirConverter : public Fortran::lower::AbstractConverter {
/// Create DO CONCURRENT construct symbol bindings and generate LOCAL_INIT
/// assignments.
- void handleLocalitySpecs(const IncrementLoopInfo &info) {
+ void handleLocalitySpecs(IncrementLoopInfo &info) {
Fortran::semantics::SemanticsContext &semanticsContext =
bridge.getSemanticsContext();
- for (const Fortran::semantics::Symbol *sym : info.localSymList)
+ Fortran::lower::omp::DataSharingProcessor dsp(
+ *this, semanticsContext, getEval(),
+ /*useDelayedPrivatization=*/true, localSymbols);
+ mlir::omp::PrivateClauseOps privateClauseOps;
+
+ for (const Fortran::semantics::Symbol *sym : info.localSymList) {
+ if (enableDelayedPrivatizationStaging) {
+ dsp.doPrivatize(sym, &privateClauseOps);
+ continue;
+ }
+
createHostAssociateVarClone(*sym, /*skipDefaultInit=*/false);
+ }
+
for (const Fortran::semantics::Symbol *sym : info.localInitSymList) {
+ if (enableDelayedPrivatizationStaging) {
+ dsp.doPrivatize(sym, &privateClauseOps);
+ continue;
+ }
+
createHostAssociateVarClone(*sym, /*skipDefaultInit=*/true);
const auto *hostDetails =
sym->detailsIf<Fortran::semantics::HostAssocDetails>();
@@ -2033,11 +2052,27 @@ class FirConverter : public Fortran::lower::AbstractConverter {
assign.u = Fortran::evaluate::Assignment::BoundsSpec{};
genAssignment(assign);
}
+
for (const Fortran::semantics::Symbol *sym : info.sharedSymList) {
const auto *hostDetails =
sym->detailsIf<Fortran::semantics::HostAssocDetails>();
copySymbolBinding(hostDetails->symbol(), *sym);
}
+
+ info.doLoop.getPrivateVarsMutable().assign(privateClauseOps.privateVars);
+ info.doLoop.setPrivateSymsAttr(
+ builder->getArrayAttr(privateClauseOps.privateSyms));
+
+ for (auto [sym, privateVar] : llvm::zip_equal(
+ dsp.getAllSymbolsToPrivatize(), privateClauseOps.privateVars)) {
+ auto arg = info.doLoop.getRegion().begin()->addArgument(
+ privateVar.getType(), info.doLoop.getLoc());
+ bindSymbol(*sym, hlfir::translateToExtendedValue(
+ privateVar.getLoc(), *builder, hlfir::Entity{arg},
+ /*contiguousHint=*/true)
+ .first);
+ }
+
// Note that allocatable, types with ultimate components, and type
// requiring finalization are forbidden in LOCAL/LOCAL_INIT (F2023 C1130),
// so no clean-up needs to be generated for these entities.
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index d13f101f516e7..26615cd97dd31 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -53,6 +53,15 @@ DataSharingProcessor::DataSharingProcessor(
});
}
+DataSharingProcessor::DataSharingProcessor(lower::AbstractConverter &converter,
+ semantics::SemanticsContext &semaCtx,
+ lower::pft::Evaluation &eval,
+ bool useDelayedPrivatization,
+ lower::SymMap &symTable)
+ : DataSharingProcessor(converter, semaCtx, {}, eval,
+ /*shouldCollectPreDeterminedSymols=*/false,
+ useDelayedPrivatization, symTable) {}
+
void DataSharingProcessor::processStep1(
mlir::omp::PrivateClauseOps *clauseOps) {
collectSymbolsForPrivatization();
@@ -498,20 +507,26 @@ void DataSharingProcessor::copyLastPrivatize(mlir::Operation *op) {
}
}
-void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
+void DataSharingProcessor::doPrivatize(const semantics::Symbol *symToPrivatize,
mlir::omp::PrivateClauseOps *clauseOps) {
if (!useDelayedPrivatization) {
- cloneSymbol(sym);
- copyFirstPrivateSymbol(sym);
+ cloneSymbol(symToPrivatize);
+ copyFirstPrivateSymbol(symToPrivatize);
return;
}
- lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym);
+ const semantics::Symbol *sym = symToPrivatize->HasLocalLocality()
+ ? &symToPrivatize->GetUltimate()
+ : symToPrivatize;
+ lower::SymbolBox hsb = symToPrivatize->HasLocalLocality()
+ ? converter.shallowLookupSymbol(*sym)
+ : converter.lookupOneLevelUpSymbol(*sym);
assert(hsb && "Host symbol box not found");
mlir::Location symLoc = hsb.getAddr().getLoc();
std::string privatizerName = sym->name().ToString() + ".privatizer";
- bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate);
+ bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate) ||
+ sym->test(semantics::Symbol::Flag::LocalityLocalInit);
mlir::Value privVal = hsb.getAddr();
mlir::Type allocType = privVal.getType();
@@ -638,6 +653,8 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
}
symToPrivatizer[sym] = privatizerOp;
+ if (symToPrivatize->HasLocalLocality())
+ allPrivatizedSymbols.insert(symToPrivatize);
}
} // namespace omp
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
index 54a42fd199831..f5fef9f6dfe85 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
@@ -105,8 +105,6 @@ class DataSharingProcessor {
void collectImplicitSymbols();
void collectPreDeterminedSymbols();
void privatize(mlir::omp::PrivateClauseOps *clauseOps);
- void doPrivatize(const semantics::Symbol *sym,
- mlir::omp::PrivateClauseOps *clauseOps);
void copyLastPrivatize(mlir::Operation *op);
void insertLastPrivateCompare(mlir::Operation *op);
void cloneSymbol(const semantics::Symbol *sym);
@@ -125,6 +123,11 @@ class DataSharingProcessor {
bool shouldCollectPreDeterminedSymbols,
bool useDelayedPrivatization, lower::SymMap &symTable);
+ DataSharingProcessor(lower::AbstractConverter &converter,
+ semantics::SemanticsContext &semaCtx,
+ lower::pft::Evaluation &eval,
+ bool useDelayedPrivatization, lower::SymMap &symTable);
+
// Privatisation is split into two steps.
// Step1 performs cloning of all privatisation clauses and copying for
// firstprivates. Step1 is performed at the place where process/processStep1
@@ -151,6 +154,9 @@ class DataSharingProcessor {
? allPrivatizedSymbols.getArrayRef()
: llvm::ArrayRef<const semantics::Symbol *>();
}
+
+ void doPrivatize(const semantics::Symbol *sym,
+ mlir::omp::PrivateClauseOps *clauseOps);
};
} // namespace omp
More information about the llvm-commits
mailing list