[Mlir-commits] [flang] [mlir] [WIP] Delayed privatization. (PR #79862)
Kareem Ergawy
llvmlistbot at llvm.org
Wed Feb 7 06:25:30 PST 2024
https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/79862
>From 7db8eb6385c2bbd0a959c272b22c67d37e545886 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Mon, 29 Jan 2024 04:45:18 -0600
Subject: [PATCH 1/2] [WIP] Delayed privatization.
This is a PoC for delayed privatization in OpenMP. Instead of directly
emitting privatization code in the frontend, we add a new op to outline
the privatization logic for a symbol and call-like mapping that maps
from the host symbol to an outlined function-like privatizer op.
Later, we would inline the delayed privatizer function-like op in the
OpenMP region to basically get the same code generated directly by the
fronend at the moment.
---
flang/include/flang/Lower/AbstractConverter.h | 4 +
flang/lib/Lower/Bridge.cpp | 2 +-
flang/lib/Lower/OpenMP.cpp | 314 ++++++++++++++----
.../OpenMP/FIR/delayed_privatization.f90 | 182 ++++++++++
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 43 ++-
.../Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp | 23 +-
.../Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 4 +-
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 93 +++++-
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 106 +++++-
mlir/test/Dialect/OpenMP/ops.mlir | 10 +-
mlir/test/Dialect/OpenMP/roundtrip.mlir | 36 ++
11 files changed, 736 insertions(+), 81 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/FIR/delayed_privatization.f90
create mode 100644 mlir/test/Dialect/OpenMP/roundtrip.mlir
diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index 796933a4eb5f68..55bc33e76e5f6e 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -16,6 +16,7 @@
#include "flang/Common/Fortran.h"
#include "flang/Lower/LoweringOptions.h"
#include "flang/Lower/PFTDefs.h"
+#include "flang/Lower/SymbolMap.h"
#include "flang/Optimizer/Builder/BoxValue.h"
#include "flang/Semantics/symbol.h"
#include "mlir/IR/Builders.h"
@@ -296,6 +297,9 @@ class AbstractConverter {
return loweringOptions;
}
+ virtual Fortran::lower::SymbolBox
+ lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0;
+
private:
/// Options controlling lowering behavior.
const Fortran::lower::LoweringOptions &loweringOptions;
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 579f94ba756841..7a0804d57ff3ad 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -1070,7 +1070,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
/// Find the symbol in one level up of symbol map such as for host-association
/// in OpenMP code or return null.
Fortran::lower::SymbolBox
- lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) {
+ lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) override {
if (Fortran::lower::SymbolBox v = localSymbols.lookupOneLevelUpSymbol(sym))
return v;
return {};
diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 0a68aba162618b..81160086b1e835 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -32,6 +32,7 @@
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Support/CommandLine.h"
@@ -40,6 +41,12 @@ static llvm::cl::opt<bool> treatIndexAsSection(
llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."),
llvm::cl::init(true));
+static llvm::cl::opt<bool> enableDelayedPrivatization(
+ "openmp-enable-delayed-privatization",
+ llvm::cl::desc(
+ "Emit `[first]private` variables as clauses on the MLIR ops."),
+ llvm::cl::init(false));
+
using DeclareTargetCapturePair =
std::pair<mlir::omp::DeclareTargetCaptureClause,
Fortran::semantics::Symbol>;
@@ -147,6 +154,14 @@ static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
//===----------------------------------------------------------------------===//
class DataSharingProcessor {
+public:
+ struct DelayedPrivatizationInfo {
+ llvm::SetVector<mlir::SymbolRefAttr> privatizers;
+ llvm::SetVector<mlir::Value> hostAddresses;
+ llvm::SetVector<const Fortran::semantics::Symbol *> hostSymbols;
+ };
+
+private:
bool hasLastPrivateOp;
mlir::OpBuilder::InsertPoint lastPrivIP;
mlir::OpBuilder::InsertPoint insPt;
@@ -161,6 +176,12 @@ class DataSharingProcessor {
const Fortran::parser::OmpClauseList &opClauseList;
Fortran::lower::pft::Evaluation &eval;
+ bool useDelayedPrivatization;
+ llvm::SetVector<mlir::StringRef> existingPrivatizerNames;
+ Fortran::lower::SymMap *symTable;
+
+ DelayedPrivatizationInfo delayedPrivatizationInfo;
+
bool needBarrier();
void collectSymbols(Fortran::semantics::Symbol::Flag flag);
void collectOmpObjectListSymbol(
@@ -171,6 +192,8 @@ class DataSharingProcessor {
void collectDefaultSymbols();
void privatize();
void defaultPrivatize();
+ void doPrivatize(const Fortran::semantics::Symbol *sym);
+
void copyLastPrivatize(mlir::Operation *op);
void insertLastPrivateCompare(mlir::Operation *op);
void cloneSymbol(const Fortran::semantics::Symbol *sym);
@@ -182,10 +205,20 @@ class DataSharingProcessor {
public:
DataSharingProcessor(Fortran::lower::AbstractConverter &converter,
const Fortran::parser::OmpClauseList &opClauseList,
- Fortran::lower::pft::Evaluation &eval)
+ Fortran::lower::pft::Evaluation &eval,
+ bool useDelayedPrivatization = false,
+ Fortran::lower::SymMap *symTable = nullptr)
: hasLastPrivateOp(false), converter(converter),
firOpBuilder(converter.getFirOpBuilder()), opClauseList(opClauseList),
- eval(eval) {}
+ eval(eval), useDelayedPrivatization(useDelayedPrivatization),
+ symTable(symTable) {
+ for (auto privateOp : converter.getModuleOp()
+ .getRegion()
+ .getOps<mlir::omp::PrivateClauseOp>()) {
+ existingPrivatizerNames.insert(privateOp.getSymName());
+ }
+ }
+
// Privatisation is split into two steps.
// Step1 performs cloning of all privatisation clauses and copying for
// firstprivates. Step1 is performed at the place where process/processStep1
@@ -204,6 +237,10 @@ class DataSharingProcessor {
assert(!loopIV && "Loop iteration variable already set");
loopIV = iv;
}
+
+ const DelayedPrivatizationInfo &getDelayedPrivatizationInfo() const {
+ return delayedPrivatizationInfo;
+ }
};
void DataSharingProcessor::processStep1() {
@@ -488,16 +525,15 @@ void DataSharingProcessor::collectDefaultSymbols() {
}
void DataSharingProcessor::privatize() {
+
for (const Fortran::semantics::Symbol *sym : privatizedSymbols) {
if (const auto *commonDet =
sym->detailsIf<Fortran::semantics::CommonBlockDetails>()) {
for (const auto &mem : commonDet->objects()) {
- cloneSymbol(&*mem);
- copyFirstPrivateSymbol(&*mem);
+ doPrivatize(&*mem);
}
} else {
- cloneSymbol(sym);
- copyFirstPrivateSymbol(sym);
+ doPrivatize(sym);
}
}
}
@@ -523,12 +559,66 @@ void DataSharingProcessor::defaultPrivatize() {
!symbolsInNestedRegions.contains(sym) &&
!symbolsInParentRegions.contains(sym) &&
!privatizedSymbols.contains(sym)) {
- cloneSymbol(sym);
- copyFirstPrivateSymbol(sym);
+ doPrivatize(sym);
}
}
}
+void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
+ if (useDelayedPrivatization) {
+ auto ip = firOpBuilder.saveInsertionPoint();
+
+ auto moduleOp = firOpBuilder.getInsertionBlock()
+ ->getParentOp()
+ ->getParentOfType<mlir::ModuleOp>();
+
+ firOpBuilder.setInsertionPoint(&moduleOp.getBodyRegion().front(),
+ moduleOp.getBodyRegion().front().end());
+
+ Fortran::lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym);
+ assert(hsb && "Host symbol box not found");
+
+ mlir::Type symType = hsb.getAddr().getType();
+ mlir::Location symLoc = hsb.getAddr().getLoc();
+ std::string privatizerName = sym->name().ToString() + ".privatizer";
+
+ unsigned uniquingCounter = 0;
+ auto uniquePrivatizerName = mlir::SymbolTable::generateSymbolName<64>(
+ privatizerName,
+ [&](auto &suggestedName) {
+ return existingPrivatizerNames.count(suggestedName);
+ },
+ uniquingCounter);
+
+ auto privatizerOp = firOpBuilder.create<mlir::omp::PrivateClauseOp>(
+ symLoc, symType, uniquePrivatizerName);
+ firOpBuilder.setInsertionPointToEnd(&privatizerOp.getBody().front());
+
+ symTable->pushScope();
+ symTable->addSymbol(*sym, privatizerOp.getArgument(0));
+ symTable->pushScope();
+
+ cloneSymbol(sym);
+ copyFirstPrivateSymbol(sym);
+
+ firOpBuilder.create<mlir::omp::YieldOp>(
+ hsb.getAddr().getLoc(), symTable->shallowLookupSymbol(*sym).getAddr());
+
+ symTable->popScope();
+ symTable->popScope();
+ firOpBuilder.restoreInsertionPoint(ip);
+
+ delayedPrivatizationInfo.privatizers.insert(
+ mlir::SymbolRefAttr::get(privatizerOp));
+ delayedPrivatizationInfo.hostAddresses.insert(hsb.getAddr());
+ delayedPrivatizationInfo.hostSymbols.insert(sym);
+ existingPrivatizerNames.insert(uniquePrivatizerName);
+ } else {
+ cloneSymbol(sym);
+ copyFirstPrivateSymbol(sym);
+ }
+}
+
//===----------------------------------------------------------------------===//
// ClauseProcessor
//===----------------------------------------------------------------------===//
@@ -2267,7 +2357,9 @@ static void createBodyOfOp(
Op &op, Fortran::lower::AbstractConverter &converter, mlir::Location &loc,
Fortran::lower::pft::Evaluation &eval, bool genNested,
const Fortran::parser::OmpClauseList *clauses = nullptr,
- const llvm::SmallVector<const Fortran::semantics::Symbol *> &args = {},
+ std::function<llvm::SmallVector<const Fortran::semantics::Symbol *>(
+ mlir::Operation *)>
+ genRegionEntryCB = nullptr,
bool outerCombined = false, DataSharingProcessor *dsp = nullptr) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
@@ -2281,27 +2373,15 @@ static void createBodyOfOp(
// argument. Also update the symbol's address with the mlir argument value.
// e.g. For loops the argument is the induction variable. And all further
// uses of the induction variable should use this mlir value.
- if (args.size()) {
- std::size_t loopVarTypeSize = 0;
- for (const Fortran::semantics::Symbol *arg : args)
- loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size());
- mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
- llvm::SmallVector<mlir::Type> tiv(args.size(), loopVarType);
- llvm::SmallVector<mlir::Location> locs(args.size(), loc);
- firOpBuilder.createBlock(&op.getRegion(), {}, tiv, locs);
- // The argument is not currently in memory, so make a temporary for the
- // argument, and store it there, then bind that location to the argument.
- mlir::Operation *storeOp = nullptr;
- for (auto [argIndex, argSymbol] : llvm::enumerate(args)) {
- mlir::Value indexVal =
- fir::getBase(op.getRegion().front().getArgument(argIndex));
- storeOp =
- createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
+ auto regionArgs =
+ [&]() -> llvm::SmallVector<const Fortran::semantics::Symbol *> {
+ if (genRegionEntryCB != nullptr) {
+ return genRegionEntryCB(op);
}
- firOpBuilder.setInsertionPointAfter(storeOp);
- } else {
+
firOpBuilder.createBlock(&op.getRegion());
- }
+ return {};
+ }();
// Mark the earliest insertion point.
mlir::Operation *marker = insertMarker(firOpBuilder);
@@ -2399,8 +2479,8 @@ static void createBodyOfOp(
assert(tempDsp.has_value());
tempDsp->processStep2(op, isLoop);
} else {
- if (isLoop && args.size() > 0)
- dsp->setLoopIV(converter.getSymbolAddress(*args[0]));
+ if (isLoop && regionArgs.size() > 0)
+ dsp->setLoopIV(converter.getSymbolAddress(*regionArgs[0]));
dsp->processStep2(op, isLoop);
}
}
@@ -2476,16 +2556,19 @@ static void genBodyOfTargetDataOp(
}
template <typename OpTy, typename... Args>
-static OpTy genOpWithBody(Fortran::lower::AbstractConverter &converter,
- Fortran::lower::pft::Evaluation &eval, bool genNested,
- mlir::Location currentLocation, bool outerCombined,
- const Fortran::parser::OmpClauseList *clauseList,
- Args &&...args) {
+static OpTy genOpWithBody(
+ Fortran::lower::AbstractConverter &converter,
+ Fortran::lower::pft::Evaluation &eval, bool genNested,
+ mlir::Location currentLocation, bool outerCombined,
+ const Fortran::parser::OmpClauseList *clauseList,
+ std::function<llvm::SmallVector<const Fortran::semantics::Symbol *>(
+ mlir::Operation *)>
+ genRegionEntryCB,
+ DataSharingProcessor *dsp, Args &&...args) {
auto op = converter.getFirOpBuilder().create<OpTy>(
currentLocation, std::forward<Args>(args)...);
createBodyOfOp<OpTy>(op, converter, currentLocation, eval, genNested,
- clauseList,
- /*args=*/{}, outerCombined);
+ clauseList, genRegionEntryCB, outerCombined, dsp);
return op;
}
@@ -2493,11 +2576,12 @@ static mlir::omp::MasterOp
genMasterOp(Fortran::lower::AbstractConverter &converter,
Fortran::lower::pft::Evaluation &eval, bool genNested,
mlir::Location currentLocation) {
- return genOpWithBody<mlir::omp::MasterOp>(converter, eval, genNested,
- currentLocation,
- /*outerCombined=*/false,
- /*clauseList=*/nullptr,
- /*resultTypes=*/mlir::TypeRange());
+ return genOpWithBody<mlir::omp::MasterOp>(
+ converter, eval, genNested, currentLocation,
+ /*outerCombined=*/false,
+ /*clauseList=*/nullptr, /*genRegionEntryCB=*/nullptr,
+ /*dsp=*/nullptr,
+ /*resultTypes=*/mlir::TypeRange());
}
static mlir::omp::OrderedRegionOp
@@ -2507,11 +2591,14 @@ genOrderedRegionOp(Fortran::lower::AbstractConverter &converter,
return genOpWithBody<mlir::omp::OrderedRegionOp>(
converter, eval, genNested, currentLocation,
/*outerCombined=*/false,
- /*clauseList=*/nullptr, /*simd=*/false);
+ /*clauseList=*/nullptr, /*genRegionEntryCB=*/nullptr,
+ /*dsp=*/nullptr,
+ /*simd=*/false);
}
static mlir::omp::ParallelOp
genParallelOp(Fortran::lower::AbstractConverter &converter,
+ Fortran::lower::SymMap &symTable,
Fortran::lower::pft::Evaluation &eval, bool genNested,
mlir::Location currentLocation,
const Fortran::parser::OmpClauseList &clauseList,
@@ -2533,8 +2620,67 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
if (!outerCombined)
cp.processReduction(currentLocation, reductionVars, reductionDeclSymbols);
+ if (!enableDelayedPrivatization) {
+ return genOpWithBody<mlir::omp::ParallelOp>(
+ converter, eval, genNested, currentLocation, outerCombined, &clauseList,
+ /*genRegionEntryCB=*/nullptr, /*dsp=*/nullptr,
+ /*resultTypes=*/mlir::TypeRange(), ifClauseOperand,
+ numThreadsClauseOperand, allocateOperands, allocatorOperands,
+ reductionVars,
+ reductionDeclSymbols.empty()
+ ? nullptr
+ : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
+ reductionDeclSymbols),
+ procBindKindAttr, /*private_vars=*/llvm::SmallVector<mlir::Value>{},
+ /*privatizers=*/nullptr);
+ }
+
+ bool privatize = !outerCombined;
+ DataSharingProcessor dsp(converter, clauseList, eval,
+ /*useDelayedPrivatization=*/true, &symTable);
+
+ if (privatize) {
+ dsp.processStep1();
+ }
+
+ const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo();
+ llvm::SmallVector<mlir::Attribute> privatizers(
+ delayedPrivatizationInfo.privatizers.begin(),
+ delayedPrivatizationInfo.privatizers.end());
+
+ llvm::SmallVector<mlir::Value> privateSymAddresses(
+ delayedPrivatizationInfo.hostAddresses.begin(),
+ delayedPrivatizationInfo.hostAddresses.end());
+
+ auto genRegionEntryCB = [&](mlir::Operation *op) {
+ auto parallelOp = llvm::cast<mlir::omp::ParallelOp>(op);
+ auto privateVars = parallelOp.getPrivateVars();
+ auto ®ion = parallelOp.getRegion();
+ llvm::SmallVector<mlir::Type> privateVarTypes;
+ llvm::SmallVector<mlir::Location> privateVarLocs;
+
+ for (auto privateVar : privateVars) {
+ privateVarTypes.push_back(privateVar.getType());
+ privateVarLocs.push_back(privateVar.getLoc());
+ }
+
+ converter.getFirOpBuilder().createBlock(®ion, {}, privateVarTypes,
+ privateVarLocs);
+
+ int argIdx = 0;
+ for (const auto *sym : delayedPrivatizationInfo.hostSymbols) {
+ converter.bindSymbol(*sym, region.getArgument(argIdx));
+ ++argIdx;
+ }
+
+ return llvm::SmallVector<const Fortran::semantics::Symbol *>(
+ delayedPrivatizationInfo.hostSymbols.begin(),
+ delayedPrivatizationInfo.hostSymbols.end());
+ };
+
return genOpWithBody<mlir::omp::ParallelOp>(
converter, eval, genNested, currentLocation, outerCombined, &clauseList,
+ genRegionEntryCB, &dsp,
/*resultTypes=*/mlir::TypeRange(), ifClauseOperand,
numThreadsClauseOperand, allocateOperands, allocatorOperands,
reductionVars,
@@ -2542,7 +2688,11 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
? nullptr
: mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
reductionDeclSymbols),
- procBindKindAttr);
+ procBindKindAttr, privateSymAddresses,
+ privatizers.empty()
+ ? nullptr
+ : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
+ privatizers));
}
static mlir::omp::SectionOp
@@ -2554,7 +2704,9 @@ genSectionOp(Fortran::lower::AbstractConverter &converter,
// all privatization is done within `omp.section` operations.
return genOpWithBody<mlir::omp::SectionOp>(
converter, eval, genNested, currentLocation,
- /*outerCombined=*/false, §ionsClauseList);
+ /*outerCombined=*/false, §ionsClauseList,
+ /*genRegionEntryCB=*/nullptr,
+ /*dsp=*/nullptr);
}
static mlir::omp::SingleOp
@@ -2575,8 +2727,8 @@ genSingleOp(Fortran::lower::AbstractConverter &converter,
return genOpWithBody<mlir::omp::SingleOp>(
converter, eval, genNested, currentLocation,
- /*outerCombined=*/false, &beginClauseList, allocateOperands,
- allocatorOperands, nowaitAttr);
+ /*outerCombined=*/false, &beginClauseList, /*genRegionEntryCB=*/nullptr,
+ /*dsp=*/nullptr, allocateOperands, allocatorOperands, nowaitAttr);
}
static mlir::omp::TaskOp
@@ -2608,8 +2760,9 @@ genTaskOp(Fortran::lower::AbstractConverter &converter,
return genOpWithBody<mlir::omp::TaskOp>(
converter, eval, genNested, currentLocation,
- /*outerCombined=*/false, &clauseList, ifClauseOperand, finalClauseOperand,
- untiedAttr, mergeableAttr,
+ /*outerCombined=*/false, &clauseList, /*genRegionEntryCB=*/nullptr,
+ /*dsp=*/nullptr, ifClauseOperand, finalClauseOperand, untiedAttr,
+ mergeableAttr,
/*in_reduction_vars=*/mlir::ValueRange(),
/*in_reductions=*/nullptr, priorityClauseOperand,
dependTypeOperands.empty()
@@ -2631,7 +2784,8 @@ genTaskGroupOp(Fortran::lower::AbstractConverter &converter,
currentLocation, llvm::omp::Directive::OMPD_taskgroup);
return genOpWithBody<mlir::omp::TaskGroupOp>(
converter, eval, genNested, currentLocation,
- /*outerCombined=*/false, &clauseList,
+ /*outerCombined=*/false, &clauseList, /*genRegionEntryCB=*/nullptr,
+ /*dsp=*/nullptr,
/*task_reduction_vars=*/mlir::ValueRange(),
/*task_reductions=*/nullptr, allocateOperands, allocatorOperands);
}
@@ -3015,6 +3169,8 @@ genTeamsOp(Fortran::lower::AbstractConverter &converter,
return genOpWithBody<mlir::omp::TeamsOp>(
converter, eval, genNested, currentLocation, outerCombined, &clauseList,
+ /*genRegionEntryCB=*/nullptr,
+ /*dsp=*/nullptr,
/*num_teams_lower=*/nullptr, numTeamsClauseOperand, ifClauseOperand,
threadLimitClauseOperand, allocateOperands, allocatorOperands,
reductionVars,
@@ -3211,6 +3367,33 @@ static void convertLoopBounds(Fortran::lower::AbstractConverter &converter,
}
}
+static llvm::SmallVector<const Fortran::semantics::Symbol *> genCodeForIterVar(
+ mlir::Operation *op, Fortran::lower::AbstractConverter &converter,
+ mlir::Location &loc,
+ const llvm::SmallVector<const Fortran::semantics::Symbol *> &args) {
+ fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+ auto ®ion = op->getRegion(0);
+
+ std::size_t loopVarTypeSize = 0;
+ for (const Fortran::semantics::Symbol *arg : args)
+ loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size());
+ mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
+ llvm::SmallVector<mlir::Type> tiv(args.size(), loopVarType);
+ llvm::SmallVector<mlir::Location> locs(args.size(), loc);
+ firOpBuilder.createBlock(®ion, {}, tiv, locs);
+ // The argument is not currently in memory, so make a temporary for the
+ // argument, and store it there, then bind that location to the argument.
+ mlir::Operation *storeOp = nullptr;
+ for (auto [argIndex, argSymbol] : llvm::enumerate(args)) {
+ mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex));
+ storeOp =
+ createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
+ }
+ firOpBuilder.setInsertionPointAfter(storeOp);
+
+ return args;
+}
+
static void
createSimdLoop(Fortran::lower::AbstractConverter &converter,
Fortran::lower::pft::Evaluation &eval,
@@ -3258,9 +3441,14 @@ createSimdLoop(Fortran::lower::AbstractConverter &converter,
auto *nestedEval = getCollapsedLoopEval(
eval, Fortran::lower::getCollapseValue(loopOpClauseList));
+
+ auto ivCallback = [&](mlir::Operation *op) {
+ return genCodeForIterVar(op, converter, loc, iv);
+ };
+
createBodyOfOp<mlir::omp::SimdLoopOp>(simdLoopOp, converter, loc, *nestedEval,
/*genNested=*/true, &loopOpClauseList,
- iv, /*outer=*/false, &dsp);
+ ivCallback, /*outer=*/false, &dsp);
}
static void createWsLoop(Fortran::lower::AbstractConverter &converter,
@@ -3333,8 +3521,14 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter,
auto *nestedEval = getCollapsedLoopEval(
eval, Fortran::lower::getCollapseValue(beginClauseList));
+
+ auto ivCallback = [&](mlir::Operation *op) {
+ return genCodeForIterVar(op, converter, loc, iv);
+ };
+
createBodyOfOp<mlir::omp::WsLoopOp>(wsLoopOp, converter, loc, *nestedEval,
- /*genNested=*/true, &beginClauseList, iv,
+ /*genNested=*/true, &beginClauseList,
+ ivCallback,
/*outer=*/false, &dsp);
}
@@ -3413,8 +3607,8 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet)
.test(ompDirective)) {
validDirective = true;
- genParallelOp(converter, eval, /*genNested=*/false, currentLocation,
- loopOpClauseList,
+ genParallelOp(converter, symTable, eval, /*genNested=*/false,
+ currentLocation, loopOpClauseList,
/*outerCombined=*/true);
}
}
@@ -3502,8 +3696,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
genOrderedRegionOp(converter, eval, /*genNested=*/true, currentLocation);
break;
case llvm::omp::Directive::OMPD_parallel:
- genParallelOp(converter, eval, /*genNested=*/true, currentLocation,
- beginClauseList);
+ genParallelOp(converter, symTable, eval, /*genNested=*/true,
+ currentLocation, beginClauseList);
break;
case llvm::omp::Directive::OMPD_single:
genSingleOp(converter, eval, /*genNested=*/true, currentLocation,
@@ -3562,8 +3756,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
.test(directive.v)) {
bool outerCombined =
directive.v != llvm::omp::Directive::OMPD_target_parallel;
- genParallelOp(converter, eval, /*genNested=*/false, currentLocation,
- beginClauseList, outerCombined);
+ genParallelOp(converter, symTable, eval, /*genNested=*/false,
+ currentLocation, beginClauseList, outerCombined);
combinedDirective = true;
}
if ((llvm::omp::workShareSet & llvm::omp::blockConstructSet)
@@ -3646,7 +3840,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
// Parallel wrapper of PARALLEL SECTIONS construct
if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
- genParallelOp(converter, eval,
+ genParallelOp(converter, symTable, eval,
/*genNested=*/false, currentLocation, sectionsClauseList,
/*outerCombined=*/true);
} else {
@@ -3663,6 +3857,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
/*genNested=*/false, currentLocation,
/*outerCombined=*/false,
/*clauseList=*/nullptr,
+ /*genRegionEntryCB=*/nullptr,
+ /*dsp=*/nullptr,
/*reduction_vars=*/mlir::ValueRange(),
/*reductions=*/nullptr, allocateOperands,
allocatorOperands, nowaitClauseOperand);
diff --git a/flang/test/Lower/OpenMP/FIR/delayed_privatization.f90 b/flang/test/Lower/OpenMP/FIR/delayed_privatization.f90
new file mode 100644
index 00000000000000..bb978bc1198af5
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/delayed_privatization.f90
@@ -0,0 +1,182 @@
+! TODO Convert this file into a bunch of lit tests for each conversion step.
+
+! RUN: bbc -fopenmp -emit-fir --openmp-enable-delayed-privatization -hlfir=false %s -o -
+
+subroutine delayed_privatization()
+ integer :: var1
+ integer :: var2
+
+ var1 = 111
+ var2 = 222
+
+!$OMP PARALLEL FIRSTPRIVATE(var1, var2)
+ var1 = var1 + var2 + 2
+!$OMP END PARALLEL
+
+end subroutine
+
+! -----------------------------------------
+! ## This is what flang emits with the PoC:
+! -----------------------------------------
+!
+! ----------------------------
+! ### Conversion to FIR + OMP:
+! ----------------------------
+!module {
+! func.func @_QPdelayed_privatization() {
+! %0 = fir.alloca i32 {bindc_name = "var1", uniq_name = "_QFdelayed_privatizationEvar1"}
+! %1 = fir.alloca i32 {bindc_name = "var2", uniq_name = "_QFdelayed_privatizationEvar2"}
+! %c111_i32 = arith.constant 111 : i32
+! fir.store %c111_i32 to %0 : !fir.ref<i32>
+! %c222_i32 = arith.constant 222 : i32
+! fir.store %c222_i32 to %1 : !fir.ref<i32>
+! omp.parallel private(@var1.privatizer %0, @var2.privatizer %1 : !fir.ref<i32>, !fir.ref<i32>) {
+! ^bb0(%arg0: !fir.ref<i32>, %arg1: !fir.ref<i32>):
+! %2 = fir.load %arg0 : !fir.ref<i32>
+! %3 = fir.load %arg1 : !fir.ref<i32>
+! %4 = arith.addi %2, %3 : i32
+! %c2_i32 = arith.constant 2 : i32
+! %5 = arith.addi %4, %c2_i32 : i32
+! fir.store %5 to %arg0 : !fir.ref<i32>
+! omp.terminator
+! }
+! return
+! }
+! "omp.private"() <{function_type = (!fir.ref<i32>) -> !fir.ref<i32>, sym_name = "var1.privatizer"}> ({
+! ^bb0(%arg0: !fir.ref<i32>):
+! %0 = fir.alloca i32 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatizationEvar1"}
+! %1 = fir.load %arg0 : !fir.ref<i32>
+! fir.store %1 to %0 : !fir.ref<i32>
+! omp.yield(%0 : !fir.ref<i32>)
+! }) : () -> ()
+! "omp.private"() <{function_type = (!fir.ref<i32>) -> !fir.ref<i32>, sym_name = "var2.privatizer"}> ({
+! ^bb0(%arg0: !fir.ref<i32>):
+! %0 = fir.alloca i32 {bindc_name = "var2", pinned, uniq_name = "_QFdelayed_privatizationEvar2"}
+! %1 = fir.load %arg0 : !fir.ref<i32>
+! fir.store %1 to %0 : !fir.ref<i32>
+! omp.yield(%0 : !fir.ref<i32>)
+! }) : () -> ()
+!
+! -----------------------------
+! ### Conversion to LLVM + OMP:
+! -----------------------------
+!module {
+! llvm.func @_QPdelayed_privatization() {
+! %0 = llvm.mlir.constant(1 : i64) : i64
+! %1 = llvm.alloca %0 x i32 {bindc_name = "var1"} : (i64) -> !llvm.ptr
+! %2 = llvm.mlir.constant(1 : i64) : i64
+! %3 = llvm.alloca %2 x i32 {bindc_name = "var2"} : (i64) -> !llvm.ptr
+! %4 = llvm.mlir.constant(111 : i32) : i32
+! llvm.store %4, %1 : i32, !llvm.ptr
+! %5 = llvm.mlir.constant(222 : i32) : i32
+! llvm.store %5, %3 : i32, !llvm.ptr
+! omp.parallel private(@var1.privatizer %1, @var2.privatizer %3 : !llvm.ptr, !llvm.ptr) {
+! ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+! %6 = llvm.load %arg0 : !llvm.ptr -> i32
+! %7 = llvm.load %arg1 : !llvm.ptr -> i32
+! %8 = llvm.add %6, %7 : i32
+! %9 = llvm.mlir.constant(2 : i32) : i32
+! %10 = llvm.add %8, %9 : i32
+! llvm.store %10, %arg0 : i32, !llvm.ptr
+! omp.terminator
+! }
+! llvm.return
+! }
+! "omp.private"() <{function_type = (!llvm.ptr) -> !llvm.ptr, sym_name = "var1.privatizer"}> ({
+! ^bb0(%arg0: !llvm.ptr):
+! %0 = llvm.mlir.constant(1 : i64) : i64
+! %1 = llvm.alloca %0 x i32 {bindc_name = "var1", pinned} : (i64) -> !llvm.ptr
+! %2 = llvm.load %arg0 : !llvm.ptr -> i32
+! llvm.store %2, %1 : i32, !llvm.ptr
+! omp.yield(%1 : !llvm.ptr)
+! }) : () -> ()
+! "omp.private"() <{function_type = (!llvm.ptr) -> !llvm.ptr, sym_name = "var2.privatizer"}> ({
+! ^bb0(%arg0: !llvm.ptr):
+! %0 = llvm.mlir.constant(1 : i64) : i64
+! %1 = llvm.alloca %0 x i32 {bindc_name = "var2", pinned} : (i64) -> !llvm.ptr
+! %2 = llvm.load %arg0 : !llvm.ptr -> i32
+! llvm.store %2, %1 : i32, !llvm.ptr
+! omp.yield(%1 : !llvm.ptr)
+! }) : () -> ()
+!}
+!
+! --------------------------
+! ### Conversion to LLVM IR:
+! --------------------------
+!%struct.ident_t = type { i32, i32, i32, i32, ptr }
+
+!@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
+!@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8
+
+!define void @_QPdelayed_privatization() {
+! %structArg = alloca { ptr, ptr }, align 8
+! %1 = alloca i32, i64 1, align 4
+! %2 = alloca i32, i64 1, align 4
+! store i32 111, ptr %1, align 4
+! store i32 222, ptr %2, align 4
+! br label %entry
+
+!entry: ; preds = %0
+! %omp_global_thread_num = call i32 @__kmpc_global_thread_num(ptr @1)
+! br label %omp_parallel
+
+!omp_parallel: ; preds = %entry
+! %gep_ = getelementptr { ptr, ptr }, ptr %structArg, i32 0, i32 0
+! store ptr %1, ptr %gep_, align 8
+! %gep_2 = getelementptr { ptr, ptr }, ptr %structArg, i32 0, i32 1
+! store ptr %2, ptr %gep_2, align 8
+! call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @_QPdelayed_privatization..omp_par, ptr %structArg)
+! br label %omp.par.outlined.exit
+
+!omp.par.outlined.exit: ; preds = %omp_parallel
+! br label %omp.par.exit.split
+
+!omp.par.exit.split: ; preds = %omp.par.outlined.exit
+! ret void
+!}
+
+!; Function Attrs: nounwind
+!define internal void @_QPdelayed_privatization..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) #0 {
+!omp.par.entry:
+! %gep_ = getelementptr { ptr, ptr }, ptr %0, i32 0, i32 0
+! %loadgep_ = load ptr, ptr %gep_, align 8
+! %gep_1 = getelementptr { ptr, ptr }, ptr %0, i32 0, i32 1
+! %loadgep_2 = load ptr, ptr %gep_1, align 8
+! %tid.addr.local = alloca i32, align 4
+! %1 = load i32, ptr %tid.addr, align 4
+! store i32 %1, ptr %tid.addr.local, align 4
+! %tid = load i32, ptr %tid.addr.local, align 4
+! %2 = alloca i32, i64 1, align 4
+! %3 = load i32, ptr %loadgep_, align 4
+! store i32 %3, ptr %2, align 4
+! %4 = alloca i32, i64 1, align 4
+! %5 = load i32, ptr %loadgep_2, align 4
+! store i32 %5, ptr %4, align 4
+! br label %omp.par.region
+
+!omp.par.region: ; preds = %omp.par.entry
+! br label %omp.par.region1
+
+!omp.par.region1: ; preds = %omp.par.region
+! %6 = load i32, ptr %2, align 4
+! %7 = load i32, ptr %4, align 4
+! %8 = add i32 %6, %7
+! %9 = add i32 %8, 2
+! store i32 %9, ptr %2, align 4
+! br label %omp.region.cont
+
+!omp.region.cont: ; preds = %omp.par.region1
+! br label %omp.par.pre_finalize
+
+!omp.par.pre_finalize: ; preds = %omp.region.cont
+! br label %omp.par.outlined.exit.exitStub
+
+!omp.par.outlined.exit.exitStub: ; preds = %omp.par.pre_finalize
+! ret void
+!}
+
+!; Function Attrs: nounwind
+!declare i32 @__kmpc_global_thread_num(ptr) #0
+
+!; Function Attrs: nounwind
+!declare !callback !2 void @__kmpc_fork_call(ptr, i32, ptr, ...) #0
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index ca363505485773..3ee3f8fe5df8a9 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -16,6 +16,7 @@
include "mlir/IR/EnumAttr.td"
include "mlir/IR/OpBase.td"
+include "mlir/Interfaces/FunctionInterfaces.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/Interfaces/ControlFlowInterfaces.td"
include "mlir/IR/SymbolInterfaces.td"
@@ -179,7 +180,9 @@ def ParallelOp : OpenMP_Op<"parallel", [
Variadic<AnyType>:$allocators_vars,
Variadic<OpenMP_PointerLikeType>:$reduction_vars,
OptionalAttr<SymbolRefArrayAttr>:$reductions,
- OptionalAttr<ProcBindKindAttr>:$proc_bind_val);
+ OptionalAttr<ProcBindKindAttr>:$proc_bind_val,
+ Variadic<AnyType>:$private_vars,
+ OptionalAttr<SymbolRefArrayAttr>:$privatizers);
let regions = (region AnyRegion:$region);
@@ -203,6 +206,10 @@ def ParallelOp : OpenMP_Op<"parallel", [
$allocators_vars, type($allocators_vars)
) `)`
| `proc_bind` `(` custom<ClauseAttr>($proc_bind_val) `)`
+ | `private` `(`
+ custom<PrivateVarList>(
+ $private_vars, type($private_vars), $privatizers
+ ) `)`
) $region attr-dict
}];
let hasVerifier = 1;
@@ -612,7 +619,7 @@ def SimdLoopOp : OpenMP_Op<"simdloop", [AttrSizedOperandSegments,
def YieldOp : OpenMP_Op<"yield",
[Pure, ReturnLike, Terminator,
ParentOneOf<["WsLoopOp", "ReductionDeclareOp",
- "AtomicUpdateOp", "SimdLoopOp"]>]> {
+ "AtomicUpdateOp", "SimdLoopOp", "PrivateClauseOp"]>]> {
let summary = "loop yield and termination operation";
let description = [{
"omp.yield" yields SSA values from the OpenMP dialect op region and
@@ -1479,6 +1486,38 @@ def Target_UpdateDataOp: OpenMP_Op<"target_update_data",
//===----------------------------------------------------------------------===//
// 2.14.5 target construct
//===----------------------------------------------------------------------===//
+def PrivateClauseOp : OpenMP_Op<"private", [
+ IsolatedFromAbove, FunctionOpInterface
+ ]> {
+ let summary = "TODO";
+ let description = [{}];
+
+ let arguments = (ins SymbolNameAttr:$sym_name,
+ TypeAttrOf<FunctionType>:$function_type);
+
+ let regions = (region AnyRegion:$body);
+
+ let builders = [OpBuilder<(ins
+ "::mlir::Type":$privateVarType,
+ "::llvm::StringRef":$privatizerName
+ )>];
+
+ let extraClassDeclaration = [{
+ ::mlir::Region *getCallableRegion() {
+ return &getBody();
+ }
+
+ /// Returns the argument types of this function.
+ ArrayRef<Type> getArgumentTypes() {
+ return getFunctionType().getInputs();
+ }
+
+ /// Returns the result types of this function.
+ ArrayRef<Type> getResultTypes() {
+ return getFunctionType().getResults();
+ }
+ }];
+}
def TargetOp : OpenMP_Op<"target",[IsolatedFromAbove, MapClauseOwningOpInterface,
OutlineableOpenMPOpInterface, AttrSizedOperandSegments]> {
diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
index 730858ffc67a71..d4ccbdf6082932 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
@@ -46,6 +46,17 @@ struct RegionOpConversion : public ConvertOpToLLVMPattern<OpType> {
*this->getTypeConverter())))
return failure();
+ if constexpr (std::is_same_v<OpType, mlir::omp::PrivateClauseOp>) {
+ auto llvmType = this->getTypeConverter()->convertType(
+ adaptor.getFunctionType().getInput(0));
+
+ if (!llvmType)
+ return rewriter.notifyMatchFailure(curOp,
+ "signature conversion failed");
+ newOp.setFunctionType(
+ FunctionType::get(rewriter.getContext(), {llvmType}, {llvmType}));
+ }
+
rewriter.eraseOp(curOp);
return success();
}
@@ -231,11 +242,12 @@ void mlir::configureOpenMPToLLVMConversionLegality(
mlir::omp::DataOp, mlir::omp::OrderedRegionOp, mlir::omp::ParallelOp,
mlir::omp::WsLoopOp, mlir::omp::SimdLoopOp, mlir::omp::MasterOp,
mlir::omp::SectionOp, mlir::omp::SectionsOp, mlir::omp::SingleOp,
- mlir::omp::TaskGroupOp, mlir::omp::TaskOp>([&](Operation *op) {
- return typeConverter.isLegal(&op->getRegion(0)) &&
- typeConverter.isLegal(op->getOperandTypes()) &&
- typeConverter.isLegal(op->getResultTypes());
- });
+ mlir::omp::TaskGroupOp, mlir::omp::TaskOp, mlir::omp::PrivateClauseOp>(
+ [&](Operation *op) {
+ return typeConverter.isLegal(&op->getRegion(0)) &&
+ typeConverter.isLegal(op->getOperandTypes()) &&
+ typeConverter.isLegal(op->getResultTypes());
+ });
target.addDynamicallyLegalOp<
mlir::omp::AtomicReadOp, mlir::omp::AtomicWriteOp, mlir::omp::FlushOp,
mlir::omp::ThreadprivateOp, mlir::omp::YieldOp, mlir::omp::EnterDataOp,
@@ -275,6 +287,7 @@ void mlir::populateOpenMPToLLVMConversionPatterns(LLVMTypeConverter &converter,
RegionOpConversion<omp::SimdLoopOp>, RegionOpConversion<omp::SingleOp>,
RegionOpConversion<omp::TaskGroupOp>, RegionOpConversion<omp::TaskOp>,
RegionOpConversion<omp::DataOp>, RegionOpConversion<omp::TargetOp>,
+ RegionOpConversion<omp::PrivateClauseOp>,
RegionLessOpWithVarOperandsConversion<omp::AtomicWriteOp>,
RegionOpWithVarOperandsConversion<omp::AtomicUpdateOp>,
RegionLessOpWithVarOperandsConversion<omp::FlushOp>,
diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
index 2f8b3f7e11de15..889aa755d8ba46 100644
--- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
+++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
@@ -420,7 +420,9 @@ struct ParallelOpLowering : public OpRewritePattern<scf::ParallelOp> {
/* allocators_vars = */ llvm::SmallVector<Value>{},
/* reduction_vars = */ llvm::SmallVector<Value>{},
/* reductions = */ ArrayAttr{},
- /* proc_bind_val = */ omp::ClauseProcBindKindAttr{});
+ /* proc_bind_val = */ omp::ClauseProcBindKindAttr{},
+ /*private_vars=*/mlir::ValueRange{},
+ /*privatizers=*/nullptr);
{
OpBuilder::InsertionGuard guard(rewriter);
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 381f17d0804191..5d4be49369ce51 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -989,8 +989,10 @@ void ParallelOp::build(OpBuilder &builder, OperationState &state,
ParallelOp::build(
builder, state, /*if_expr_var=*/nullptr, /*num_threads_var=*/nullptr,
/*allocate_vars=*/ValueRange(), /*allocators_vars=*/ValueRange(),
- /*reduction_vars=*/ValueRange(), /*reductions=*/nullptr,
- /*proc_bind_val=*/nullptr);
+ /*reduction_vars=*/ValueRange(),
+ /*reductions=*/nullptr,
+ /*proc_bind_val=*/nullptr, /*private_vars=*/ValueRange(),
+ /*privatizers*/ nullptr);
state.addAttributes(attributes);
}
@@ -1594,6 +1596,93 @@ LogicalResult DataBoundsOp::verify() {
return success();
}
+void PrivateClauseOp::build(OpBuilder &odsBuilder, OperationState &odsState,
+ Type privateVarType, StringRef privatizerName) {
+ FunctionType privatizerType = FunctionType::get(
+ odsBuilder.getContext(), {privateVarType}, {privateVarType});
+
+ build(odsBuilder, odsState, privatizerName, privatizerType);
+
+ mlir::Block &block = odsState.regions.front()->emplaceBlock();
+ block.addArgument(privateVarType, odsState.location);
+}
+
+static ParseResult parsePrivateVarList(
+ OpAsmParser &parser,
+ llvm::SmallVector<OpAsmParser::UnresolvedOperand, 4> &privateVarsOperands,
+ llvm::SmallVector<Type, 1> &privateVarsTypes, ArrayAttr &privatizersAttr) {
+ SymbolRefAttr privatizerSym;
+ OpAsmParser::UnresolvedOperand arg;
+ OpAsmParser::UnresolvedOperand blockArg;
+ Type argType;
+
+ SmallVector<SymbolRefAttr> privatizersVec;
+
+ auto parsePrivatizers = [&]() -> ParseResult {
+ if (parser.parseAttribute(privatizerSym) || parser.parseOperand(arg)) {
+ return failure();
+ }
+
+ privatizersVec.push_back(privatizerSym);
+ privateVarsOperands.push_back(arg);
+ return success();
+ };
+
+ auto parseTypes = [&]() -> ParseResult {
+ if (parser.parseType(argType))
+ return failure();
+ privateVarsTypes.push_back(argType);
+ return success();
+ };
+
+ if (parser.parseCommaSeparatedList(parsePrivatizers))
+ return failure();
+
+ SmallVector<Attribute> privatizers(privatizersVec.begin(),
+ privatizersVec.end());
+ privatizersAttr = ArrayAttr::get(parser.getContext(), privatizers);
+
+ if (parser.parseColon())
+ return failure();
+
+ if (parser.parseCommaSeparatedList(parseTypes))
+ return failure();
+
+ return success();
+}
+
+static void printPrivateVarList(OpAsmPrinter &printer, Operation *op,
+ OperandRange privateVars,
+ TypeRange privateVarTypes,
+ std::optional<ArrayAttr> privatizersAttr) {
+ unsigned argIndex = 0;
+ // TODO Add an op verifier instead of this assertion.
+ assert(
+ privateVars.size() == privateVarTypes.size() &&
+ ((privateVars.empty()) ||
+ (*privatizersAttr && (privatizersAttr->size() == privateVars.size()))));
+
+ for (const auto &privateVar : privateVars) {
+ assert(privatizersAttr);
+ const auto &privatizerSym = (*privatizersAttr)[argIndex];
+ printer << privatizerSym << " " << privateVar;
+
+ argIndex++;
+ if (argIndex < privateVars.size())
+ printer << ", ";
+ }
+
+ printer << " : ";
+
+ argIndex = 0;
+ for (const auto &mapType : privateVarTypes) {
+ printer << mapType;
+ argIndex++;
+ if (argIndex < privateVarTypes.size())
+ printer << ", ";
+ }
+}
+
#define GET_ATTRDEF_CLASSES
#include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.cpp.inc"
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 79956f82ed141a..4de3f32c179087 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1000,6 +1000,29 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
return success();
}
+/// Replace the region arguments of the parallel op (which correspond to private
+/// variables) with the actual private varibles they correspond to. This
+/// prepares the parallel op so that it matches what is expected by the
+/// OMPIRBuilder.
+static void prepareOmpParallel(omp::ParallelOp opInst) {
+ auto ®ion = opInst.getRegion();
+ auto privateVars = opInst.getPrivateVars();
+
+ auto privateVarsIt = privateVars.begin();
+ for (size_t argIdx = 0; argIdx < region.getNumArguments();
+ ++argIdx, ++privateVarsIt) {
+ for (auto &block : region) {
+ for (auto &op : block) {
+ op.replaceUsesOfWith(region.getArgument(argIdx), *privateVarsIt);
+ }
+ }
+ }
+
+ for (size_t argIdx = 0; argIdx < region.getNumArguments(); ++argIdx) {
+ region.eraseArgument(argIdx);
+ }
+}
+
/// Converts the OpenMP parallel operation to LLVM IR.
static LogicalResult
convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
@@ -1008,6 +1031,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
// TODO: support error propagation in OpenMPIRBuilder and use it instead of
// relying on captured variables.
LogicalResult bodyGenStatus = success();
+ prepareOmpParallel(opInst);
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
@@ -1092,6 +1116,75 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
llvm::Value *&replacementValue) -> InsertPointTy {
replacementValue = &vPtr;
+ // If this is a private value, this lambda will return the corresponding
+ // mlir value and its `PrivateClauseOp`. Otherwise, empty values are
+ // returned.
+ auto [privVar,
+ privInit] = [&]() -> std::pair<mlir::Value, omp::PrivateClauseOp> {
+ if (!opInst.getPrivateVars().empty()) {
+ auto privVars = opInst.getPrivateVars();
+ auto privInits = opInst.getPrivatizers();
+ assert(privInits && privInits->size() == privVars.size());
+
+ const auto *privInitIt = privInits->begin();
+ for (auto privVarIt = privVars.begin(); privVarIt != privVars.end();
+ ++privVarIt, ++privInitIt) {
+ auto *llvmPrivVarOp = moduleTranslation.lookupValue(*privVarIt);
+ if (llvmPrivVarOp != &vPtr) {
+ continue;
+ }
+
+ auto privSym = llvm::cast<SymbolRefAttr>(*privInitIt);
+ auto privOp =
+ SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
+ opInst, privSym);
+
+ return {*privVarIt, privOp};
+ }
+ }
+
+ return {mlir::Value(), omp::PrivateClauseOp()};
+ }();
+
+ if (privVar) {
+
+ // Replace the privatizer block argument with mlir value being privatized.
+ // This way, the body of the privatizer will be changed from using the
+ // region/block argument to the value being privatized.
+ assert(privInit->getRegions().front().getNumArguments() == 1);
+
+ auto arg = privInit->getRegions().front().getArgument(0);
+ for (auto &op : privInit->getRegions().front().front()) {
+ op.replaceUsesOfWith(arg, privVar);
+ }
+
+ auto oldIP = builder.saveIP();
+ builder.restoreIP(allocaIP);
+
+ // Temporarily unlink the terminator from its parent since
+ // `inlineConvertOmpRegions` expects the insertion block to **not**
+ // contain a terminator.
+ auto &allocaTerminator = builder.GetInsertBlock()->back();
+ assert(allocaTerminator.isTerminator());
+ allocaTerminator.removeFromParent();
+
+ SmallVector<llvm::Value *, 1> yieldedValues;
+ if (failed(inlineConvertOmpRegions(privInit->getRegion(0),
+ "omp.privatizer", builder,
+ moduleTranslation, &yieldedValues))) {
+ // TODO proper error-handling.
+ builder.restoreIP(oldIP);
+ return codeGenIP;
+ }
+
+ allocaTerminator.insertAfter(&builder.GetInsertBlock()->back());
+
+ assert(yieldedValues.size() == 1);
+ replacementValue = yieldedValues.front();
+
+ builder.restoreIP(oldIP);
+ }
+
return codeGenIP;
};
@@ -3009,12 +3102,13 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
.Case([&](omp::TargetOp) {
return convertOmpTarget(*op, builder, moduleTranslation);
})
- .Case<omp::MapInfoOp, omp::DataBoundsOp>([&](auto op) {
- // No-op, should be handled by relevant owning operations e.g.
- // TargetOp, EnterDataOp, ExitDataOp, DataOp etc. and then
- // discarded
- return success();
- })
+ .Case<omp::MapInfoOp, omp::DataBoundsOp, omp::PrivateClauseOp>(
+ [&](auto op) {
+ // No-op, should be handled by relevant owning operations e.g.
+ // TargetOp, EnterDataOp, ExitDataOp, DataOp etc. and then
+ // discarded
+ return success();
+ })
.Default([&](Operation *inst) {
return inst->emitError("unsupported OpenMP operation: ")
<< inst->getName();
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 65a704d18107b5..0335e5c951f24c 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -59,7 +59,7 @@ func.func @omp_parallel(%data_var : memref<i32>, %if_cond : i1, %num_threads : i
// CHECK: omp.parallel num_threads(%{{.*}} : i32) allocate(%{{.*}} : memref<i32> -> %{{.*}} : memref<i32>)
"omp.parallel"(%num_threads, %data_var, %data_var) ({
omp.terminator
- }) {operandSegmentSizes = array<i32: 0,1,1,1,0>} : (i32, memref<i32>, memref<i32>) -> ()
+ }) {operandSegmentSizes = array<i32: 0,1,1,1,0,0>} : (i32, memref<i32>, memref<i32>) -> ()
// CHECK: omp.barrier
omp.barrier
@@ -68,22 +68,22 @@ func.func @omp_parallel(%data_var : memref<i32>, %if_cond : i1, %num_threads : i
// CHECK: omp.parallel if(%{{.*}}) allocate(%{{.*}} : memref<i32> -> %{{.*}} : memref<i32>)
"omp.parallel"(%if_cond, %data_var, %data_var) ({
omp.terminator
- }) {operandSegmentSizes = array<i32: 1,0,1,1,0>} : (i1, memref<i32>, memref<i32>) -> ()
+ }) {operandSegmentSizes = array<i32: 1,0,1,1,0,0>} : (i1, memref<i32>, memref<i32>) -> ()
// test without allocate
// CHECK: omp.parallel if(%{{.*}}) num_threads(%{{.*}} : i32)
"omp.parallel"(%if_cond, %num_threads) ({
omp.terminator
- }) {operandSegmentSizes = array<i32: 1,1,0,0,0>} : (i1, i32) -> ()
+ }) {operandSegmentSizes = array<i32: 1,1,0,0,0,0>} : (i1, i32) -> ()
omp.terminator
- }) {operandSegmentSizes = array<i32: 1,1,1,1,0>, proc_bind_val = #omp<procbindkind spread>} : (i1, i32, memref<i32>, memref<i32>) -> ()
+ }) {operandSegmentSizes = array<i32: 1,1,1,1,0,0>, proc_bind_val = #omp<procbindkind spread>} : (i1, i32, memref<i32>, memref<i32>) -> ()
// test with multiple parameters for single variadic argument
// CHECK: omp.parallel allocate(%{{.*}} : memref<i32> -> %{{.*}} : memref<i32>)
"omp.parallel" (%data_var, %data_var) ({
omp.terminator
- }) {operandSegmentSizes = array<i32: 0,0,1,1,0>} : (memref<i32>, memref<i32>) -> ()
+ }) {operandSegmentSizes = array<i32: 0,0,1,1,0,0>} : (memref<i32>, memref<i32>) -> ()
return
}
diff --git a/mlir/test/Dialect/OpenMP/roundtrip.mlir b/mlir/test/Dialect/OpenMP/roundtrip.mlir
new file mode 100644
index 00000000000000..c6e9fab6f7f98a
--- /dev/null
+++ b/mlir/test/Dialect/OpenMP/roundtrip.mlir
@@ -0,0 +1,36 @@
+// RUN: fir-opt -verify-diagnostics %s | fir-opt | FileCheck %s
+
+// CHECK-LABEL: _QPprivate_clause
+func.func @_QPprivate_clause() {
+ %0 = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFprivate_clause_allocatableEx"}
+ %1 = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFprivate_clause_allocatableEy"}
+
+ // CHECK: omp.parallel private(@x.privatizer %0, @y.privatizer %1 : !fir.ref<i32>, !fir.ref<i32>)
+ omp.parallel private(@x.privatizer %0, @y.privatizer %1: !fir.ref<i32>, !fir.ref<i32>) {
+ omp.terminator
+ }
+ return
+}
+
+// CHECK: "omp.private"() <{function_type = (!fir.ref<i32>) -> !fir.ref<i32>, sym_name = "x.privatizer"}> ({
+"omp.private"() <{function_type = (!fir.ref<i32>) -> !fir.ref<i32>, sym_name = "x.privatizer"}> ({
+// CHECK: ^bb0(%arg0: {{.*}}):
+^bb0(%arg0: !fir.ref<i32>):
+
+ // CHECK: %0 = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFprivate_clause_allocatableEx"}
+ %0 = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFprivate_clause_allocatableEx"}
+
+ // CHECK: omp.yield(%0 : !fir.ref<i32>)
+ omp.yield(%0 : !fir.ref<i32>)
+}) : () -> ()
+
+// CHECK: "omp.private"() <{function_type = (!fir.ref<i32>) -> !fir.ref<i32>, sym_name = "y.privatizer"}> ({
+"omp.private"() <{function_type = (!fir.ref<i32>) -> !fir.ref<i32>, sym_name = "y.privatizer"}> ({
+^bb0(%arg0: !fir.ref<i32>):
+
+ // CHECK: %0 = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFprivate_clause_allocatableEy"}
+ %0 = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFprivate_clause_allocatableEy"}
+
+ // CHECK: omp.yield(%0 : !fir.ref<i32>)
+ omp.yield(%0 : !fir.ref<i32>)
+}) : () -> ()
>From 717b57ad03127edc6031c0f6ef489a9498958241 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Wed, 7 Feb 2024 04:21:42 -0600
Subject: [PATCH 2/2] Experiment with supporting delayed privatization for
hlfir simple values
---
flang/include/flang/Lower/SymbolMap.h | 1 +
flang/lib/Lower/Bridge.cpp | 5 +-
flang/lib/Optimizer/CodeGen/CodeGen.cpp | 18 ++++-
.../OpenMP/FIR/delayed_privatization.f90 | 1 +
.../FIR/delayed_privatization_hlfir.f90 | 71 +++++++++++++++++++
5 files changed, 93 insertions(+), 3 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/FIR/delayed_privatization_hlfir.f90
diff --git a/flang/include/flang/Lower/SymbolMap.h b/flang/include/flang/Lower/SymbolMap.h
index a55e4b133fe0a8..9f18d63ce3e7dd 100644
--- a/flang/include/flang/Lower/SymbolMap.h
+++ b/flang/include/flang/Lower/SymbolMap.h
@@ -312,6 +312,7 @@ class SymMap {
lookupVariableDefinition(semantics::SymbolRef sym) {
if (auto symBox = lookupSymbol(sym))
return symBox.getIfFortranVariableOpInterface();
+
return std::nullopt;
}
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 7a0804d57ff3ad..226792c9f346e5 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -1052,7 +1052,10 @@ class FirConverter : public Fortran::lower::AbstractConverter {
if (sym.detailsIf<Fortran::semantics::CommonBlockDetails>())
return symMap->lookupSymbol(sym);
- return {};
+ // With delayed privatization, Fortran symbols might now be mapped to
+ // simple `mlir::Value`s (arguments to the `omp.private` ops in this
+ // case). Therefore, it is possible that none of the above cases applies.
+ // return {};
}
if (Fortran::lower::SymbolBox v = symMap->lookupSymbol(sym))
return v;
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index f89f28c006dece..e285a9a72bd9b0 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3505,6 +3505,18 @@ struct ZeroOpConversion : public FIROpConversion<fir::ZeroOp> {
}
};
+class DeclareOpConversion : public FIROpConversion<fir::DeclareOp> {
+public:
+ using FIROpConversion::FIROpConversion;
+
+ mlir::LogicalResult
+ matchAndRewrite(fir::DeclareOp declareOp, OpAdaptor,
+ mlir::ConversionPatternRewriter &rewriter) const override {
+ rewriter.replaceOp(declareOp, declareOp.getMemref());
+ return mlir::success();
+ }
+};
+
/// `fir.unreachable` --> `llvm.unreachable`
struct UnreachableOpConversion : public FIROpConversion<fir::UnreachableOp> {
using FIROpConversion::FIROpConversion;
@@ -3856,6 +3868,7 @@ class RenameMSVCLibmFuncs
return mlir::success();
}
};
+
} // namespace
namespace {
@@ -3949,7 +3962,7 @@ class FIRToLLVMLowering
UnboxCharOpConversion, UnboxProcOpConversion, UndefOpConversion,
UnreachableOpConversion, UnrealizedConversionCastOpConversion,
XArrayCoorOpConversion, XEmboxOpConversion, XReboxOpConversion,
- ZeroOpConversion>(typeConverter, options);
+ ZeroOpConversion, DeclareOpConversion>(typeConverter, options);
mlir::populateFuncToLLVMConversionPatterns(typeConverter, pattern);
mlir::populateOpenMPToLLVMConversionPatterns(typeConverter, pattern);
mlir::arith::populateArithToLLVMConversionPatterns(typeConverter, pattern);
@@ -4002,7 +4015,8 @@ class FIRToLLVMLowering
signalPassFailure();
}
- // Run pass to add comdats to functions that have weak linkage on relevant platforms
+ // Run pass to add comdats to functions that have weak linkage on relevant
+ // platforms
if (fir::getTargetTriple(mod).supportsCOMDAT()) {
mlir::OpPassManager comdatPM("builtin.module");
comdatPM.addPass(mlir::LLVM::createLLVMAddComdats());
diff --git a/flang/test/Lower/OpenMP/FIR/delayed_privatization.f90 b/flang/test/Lower/OpenMP/FIR/delayed_privatization.f90
index bb978bc1198af5..d17e3c6da3caf2 100644
--- a/flang/test/Lower/OpenMP/FIR/delayed_privatization.f90
+++ b/flang/test/Lower/OpenMP/FIR/delayed_privatization.f90
@@ -3,6 +3,7 @@
! RUN: bbc -fopenmp -emit-fir --openmp-enable-delayed-privatization -hlfir=false %s -o -
subroutine delayed_privatization()
+ implicit none
integer :: var1
integer :: var2
diff --git a/flang/test/Lower/OpenMP/FIR/delayed_privatization_hlfir.f90 b/flang/test/Lower/OpenMP/FIR/delayed_privatization_hlfir.f90
new file mode 100644
index 00000000000000..48022d95aa6732
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/delayed_privatization_hlfir.f90
@@ -0,0 +1,71 @@
+! TODO Convert this file into a bunch of lit tests for each conversion step.
+
+! RUN: bbc -fopenmp -emit-hlfir --openmp-enable-delayed-privatization %s -o -
+
+subroutine delayed_privatization()
+ implicit none
+ integer :: var1
+ integer :: var2
+
+ var1 = 111
+ var2 = 222
+
+!$OMP PARALLEL FIRSTPRIVATE(var1, var2)
+ var1 = var1 + var2 + 2
+!$OMP END PARALLEL
+
+end subroutine
+
+
+! -----------------------------------------
+! ## This is what flang emits with the PoC:
+! -----------------------------------------
+!
+! ----------------------------
+! ### Conversion to HLFIR + OMP:
+! ----------------------------
+!module {
+! func.func @_QPdelayed_privatization() {
+! %0 = fir.alloca i32 {bindc_name = "var1", uniq_name = "_QFdelayed_privatizationEvar1"}
+! %1:2 = hlfir.declare %0 {uniq_name = "_QFdelayed_privatizationEvar1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! %2 = fir.alloca i32 {bindc_name = "var2", uniq_name = "_QFdelayed_privatizationEvar2"}
+! %3:2 = hlfir.declare %2 {uniq_name = "_QFdelayed_privatizationEvar2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! %c111_i32 = arith.constant 111 : i32
+! hlfir.assign %c111_i32 to %1#0 : i32, !fir.ref<i32>
+! %c222_i32 = arith.constant 222 : i32
+! hlfir.assign %c222_i32 to %3#0 : i32, !fir.ref<i32>
+! omp.parallel private(@var1.privatizer_0 %1#0, @var2.privatizer_0 %3#0 : !fir.ref<i32>, !fir.ref<i32>) {
+! ^bb0(%arg0: !fir.ref<i32>, %arg1: !fir.ref<i32>):
+! %4:2 = hlfir.declare %arg0 {uniq_name = "_QFdelayed_privatizationEvar1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! %5:2 = hlfir.declare %arg1 {uniq_name = "_QFdelayed_privatizationEvar2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! %6 = fir.load %4#0 : !fir.ref<i32>
+! %7 = fir.load %5#0 : !fir.ref<i32>
+! %8 = arith.addi %6, %7 : i32
+! %c2_i32 = arith.constant 2 : i32
+! %9 = arith.addi %8, %c2_i32 : i32
+! hlfir.assign %9 to %4#0 : i32, !fir.ref<i32>
+! omp.terminator
+! }
+! return
+! }
+! "omp.private"() <{function_type = (!fir.ref<i32>) -> !fir.ref<i32>, sym_name = "var1.privatizer_0"}> ({
+! ^bb0(%arg0: !fir.ref<i32>):
+! %0 = fir.alloca i32 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatizationEvar1"}
+! %1:2 = hlfir.declare %0 {uniq_name = "_QFdelayed_privatizationEvar1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! %2 = fir.load %arg0 : !fir.ref<i32>
+! hlfir.assign %2 to %1#0 temporary_lhs : i32, !fir.ref<i32>
+! omp.yield(%1#0 : !fir.ref<i32>)
+! }) : () -> ()
+! "omp.private"() <{function_type = (!fir.ref<i32>) -> !fir.ref<i32>, sym_name = "var2.privatizer_0"}> ({
+! ^bb0(%arg0: !fir.ref<i32>):
+! %0 = fir.alloca i32 {bindc_name = "var2", pinned, uniq_name = "_QFdelayed_privatizationEvar2"}
+! %1:2 = hlfir.declare %0 {uniq_name = "_QFdelayed_privatizationEvar2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! %2 = fir.load %arg0 : !fir.ref<i32>
+! hlfir.assign %2 to %1#0 temporary_lhs : i32, !fir.ref<i32>
+! omp.yield(%1#0 : !fir.ref<i32>)
+! }) : () -> ()
+!}
+!
+!
+! ### After lowring `hlfir` to `fir`, conversion to LLVM + OMP -> LLVM IR produces the exact same result as for
+! `delayed_privatization.f90`.
More information about the Mlir-commits
mailing list