[flang-commits] [flang] [flang][OpenMP][MLIR] Basic support for delayed privatization code-gen (PR #81833)
Kareem Ergawy via flang-commits
flang-commits at lists.llvm.org
Tue Feb 27 20:10:16 PST 2024
https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/81833
>From dc4b235b6d0591bfb557d40196ae284150372ab3 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Wed, 14 Feb 2024 04:24:06 -0600
Subject: [PATCH] [flang][OpenMP][MLIR] Basic support for delayed privatization
code-gen
Adds basic support for emitting delayed privatizers from flang. So far,
only types of symbols are supported (i.e. scalars), support for more
complicated types will be added later. This also makes sure that
reductio and delayed privatization work properly together by merging the
body-gen callbacks for both in case both clauses are present on the
parallel construct.
---
flang/include/flang/Lower/AbstractConverter.h | 6 +
flang/lib/Lower/Bridge.cpp | 13 ++-
.../lib/Lower/OpenMP/DataSharingProcessor.cpp | 107 +++++++++++++++---
flang/lib/Lower/OpenMP/DataSharingProcessor.h | 38 ++++++-
flang/lib/Lower/OpenMP/OpenMP.cpp | 91 +++++++++++++--
flang/lib/Lower/OpenMP/Utils.cpp | 6 +
flang/lib/Lower/OpenMP/Utils.h | 1 +
.../delayed-privatization-firstprivate.f90 | 29 +++++
.../FIR/delayed-privatization-private.f90 | 38 +++++++
.../delayed-privatization-firstprivate.f90 | 29 +++++
...yed-privatization-private-firstprivate.f90 | 34 ++++++
.../OpenMP/delayed-privatization-private.f90 | 28 +++++
.../delayed-privatization-reduction.f90 | 30 +++++
13 files changed, 422 insertions(+), 28 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90
create mode 100644 flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90
create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90
create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90
create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-private.f90
create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-reduction.f90
diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index e2af59e0aaa196..fa3fd9a8cc05ee 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -16,6 +16,7 @@
#include "flang/Common/Fortran.h"
#include "flang/Lower/LoweringOptions.h"
#include "flang/Lower/PFTDefs.h"
+#include "flang/Lower/SymbolMap.h"
#include "flang/Optimizer/Builder/BoxValue.h"
#include "flang/Semantics/symbol.h"
#include "mlir/IR/Builders.h"
@@ -299,6 +300,11 @@ class AbstractConverter {
return loweringOptions;
}
+ /// Find the symbol in one level up of symbol map such as for host-association
+ /// in OpenMP code or return null.
+ virtual Fortran::lower::SymbolBox
+ lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0;
+
private:
/// Options controlling lowering behavior.
const Fortran::lower::LoweringOptions &loweringOptions;
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index f865b53f74debd..0691753defceb3 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -1000,6 +1000,17 @@ class FirConverter : public Fortran::lower::AbstractConverter {
if (sym.detailsIf<Fortran::semantics::CommonBlockDetails>())
return symMap->lookupSymbol(sym);
+ // For symbols to be privatized in OMP, the symbol is mapped to an
+ // instance of `SymbolBox::Intrinsic` (i.e. a direct mapping to an MLIR
+ // SSA value). This MLIR SSA value is the block argument to the
+ // `omp.private`'s `alloc` block. If this is the case, we return this
+ // `SymbolBox::Intrinsic` value.
+ if (Fortran::lower::SymbolBox v = symMap->lookupSymbol(sym))
+ return v.match(
+ [&](const Fortran::lower::SymbolBox::Intrinsic &)
+ -> Fortran::lower::SymbolBox { return v; },
+ [](const auto &) -> Fortran::lower::SymbolBox { return {}; });
+
return {};
}
if (Fortran::lower::SymbolBox v = symMap->lookupSymbol(sym))
@@ -1018,7 +1029,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
/// Find the symbol in one level up of symbol map such as for host-association
/// in OpenMP code or return null.
Fortran::lower::SymbolBox
- lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) {
+ lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) override {
if (Fortran::lower::SymbolBox v = localSymbols.lookupOneLevelUpSymbol(sym))
return v;
return {};
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 136bda0b582ee3..d6287cb4bc239e 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -66,9 +66,10 @@ void DataSharingProcessor::cloneSymbol(const Fortran::semantics::Symbol *sym) {
}
void DataSharingProcessor::copyFirstPrivateSymbol(
- const Fortran::semantics::Symbol *sym) {
+ const Fortran::semantics::Symbol *sym,
+ mlir::OpBuilder::InsertPoint *copyAssignIP) {
if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate))
- converter.copyHostAssociateVar(*sym);
+ converter.copyHostAssociateVar(*sym, copyAssignIP);
}
void DataSharingProcessor::copyLastPrivateSymbol(
@@ -307,14 +308,10 @@ void DataSharingProcessor::privatize() {
for (const Fortran::semantics::Symbol *sym : privatizedSymbols) {
if (const auto *commonDet =
sym->detailsIf<Fortran::semantics::CommonBlockDetails>()) {
- for (const auto &mem : commonDet->objects()) {
- cloneSymbol(&*mem);
- copyFirstPrivateSymbol(&*mem);
- }
- } else {
- cloneSymbol(sym);
- copyFirstPrivateSymbol(sym);
- }
+ for (const auto &mem : commonDet->objects())
+ doPrivatize(&*mem);
+ } else
+ doPrivatize(sym);
}
}
@@ -338,11 +335,95 @@ void DataSharingProcessor::defaultPrivatize() {
!sym->GetUltimate().has<Fortran::semantics::NamelistDetails>() &&
!symbolsInNestedRegions.contains(sym) &&
!symbolsInParentRegions.contains(sym) &&
- !privatizedSymbols.contains(sym)) {
+ !privatizedSymbols.contains(sym))
+ doPrivatize(sym);
+ }
+}
+
+void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
+ if (!useDelayedPrivatization) {
+ cloneSymbol(sym);
+ copyFirstPrivateSymbol(sym);
+ return;
+ }
+
+ Fortran::lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym);
+ assert(hsb && "Host symbol box not found");
+
+ mlir::Type symType = hsb.getAddr().getType();
+ mlir::Location symLoc = hsb.getAddr().getLoc();
+ std::string privatizerName = sym->name().ToString() + ".privatizer";
+ bool isFirstPrivate =
+ sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate);
+
+ mlir::omp::PrivateClauseOp privatizerOp = [&]() {
+ auto moduleOp = firOpBuilder.getModule();
+ auto uniquePrivatizerName = fir::getTypeAsString(
+ symType, converter.getKindMap(),
+ converter.mangleName(*sym) +
+ (isFirstPrivate ? "_firstprivate" : "_private"));
+
+ if (auto existingPrivatizer =
+ moduleOp.lookupSymbol<mlir::omp::PrivateClauseOp>(
+ uniquePrivatizerName))
+ return existingPrivatizer;
+
+ auto ip = firOpBuilder.saveInsertionPoint();
+ firOpBuilder.setInsertionPoint(&moduleOp.getBodyRegion().front(),
+ moduleOp.getBodyRegion().front().begin());
+ auto result = firOpBuilder.create<mlir::omp::PrivateClauseOp>(
+ symLoc, uniquePrivatizerName, symType,
+ isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate
+ : mlir::omp::DataSharingClauseType::Private);
+
+ symTable->pushScope();
+
+ // Populate the `alloc` region.
+ {
+ mlir::Region &allocRegion = result.getAllocRegion();
+ mlir::Block *allocEntryBlock = firOpBuilder.createBlock(
+ &allocRegion, /*insertPt=*/{}, symType, symLoc);
+
+ firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
+ symTable->addSymbol(*sym, allocRegion.getArgument(0));
+ symTable->pushScope();
cloneSymbol(sym);
- copyFirstPrivateSymbol(sym);
+ firOpBuilder.create<mlir::omp::YieldOp>(
+ hsb.getAddr().getLoc(),
+ symTable->shallowLookupSymbol(*sym).getAddr());
+ symTable->popScope();
}
- }
+
+ // Populate the `copy` region if this is a `firstprivate`.
+ if (isFirstPrivate) {
+ mlir::Region ©Region = result.getCopyRegion();
+ // First block argument corresponding to the original/host value while
+ // second block argument corresponding to the privatized value.
+ mlir::Block *copyEntryBlock = firOpBuilder.createBlock(
+ ©Region, /*insertPt=*/{}, {symType, symType}, {symLoc, symLoc});
+ firOpBuilder.setInsertionPointToEnd(copyEntryBlock);
+ symTable->addSymbol(*sym, copyRegion.getArgument(0),
+ /*force=*/true);
+ symTable->pushScope();
+ symTable->addSymbol(*sym, copyRegion.getArgument(1));
+ auto ip = firOpBuilder.saveInsertionPoint();
+ copyFirstPrivateSymbol(sym, &ip);
+
+ firOpBuilder.create<mlir::omp::YieldOp>(
+ hsb.getAddr().getLoc(),
+ symTable->shallowLookupSymbol(*sym).getAddr());
+ symTable->popScope();
+ }
+
+ symTable->popScope();
+ firOpBuilder.restoreInsertionPoint(ip);
+ return result;
+ }();
+
+ delayedPrivatizationInfo.privatizers.push_back(
+ mlir::SymbolRefAttr::get(privatizerOp));
+ delayedPrivatizationInfo.originalAddresses.push_back(hsb.getAddr());
+ delayedPrivatizationInfo.symbols.push_back(sym);
}
} // namespace omp
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
index 10c0a30c09c391..9f7301df07598f 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
@@ -23,6 +23,24 @@ namespace lower {
namespace omp {
class DataSharingProcessor {
+public:
+ /// Collects all the information needed for delayed privatization. This can be
+ /// used by ops with data-sharing clauses to properly generate their regions
+ /// (e.g. add region arguments) and map the original SSA values to their
+ /// corresponding OMP region operands.
+ struct DelayedPrivatizationInfo {
+ // The list of symbols referring to delayed privatizer ops (i.e.
+ // `omp.private` ops).
+ llvm::SmallVector<mlir::SymbolRefAttr> privatizers;
+ // SSA values that correspond to "original" values being privatized.
+ // "Original" here means the SSA value outside the OpenMP region from which
+ // a clone is created inside the region.
+ llvm::SmallVector<mlir::Value> originalAddresses;
+ // Fortran symbols corresponding to the above SSA values.
+ llvm::SmallVector<const Fortran::semantics::Symbol *> symbols;
+ };
+
+private:
bool hasLastPrivateOp;
mlir::OpBuilder::InsertPoint lastPrivIP;
mlir::OpBuilder::InsertPoint insPt;
@@ -36,6 +54,9 @@ class DataSharingProcessor {
fir::FirOpBuilder &firOpBuilder;
const Fortran::parser::OmpClauseList &opClauseList;
Fortran::lower::pft::Evaluation &eval;
+ bool useDelayedPrivatization;
+ Fortran::lower::SymMap *symTable;
+ DelayedPrivatizationInfo delayedPrivatizationInfo;
bool needBarrier();
void collectSymbols(Fortran::semantics::Symbol::Flag flag);
@@ -47,10 +68,13 @@ class DataSharingProcessor {
void collectDefaultSymbols();
void privatize();
void defaultPrivatize();
+ void doPrivatize(const Fortran::semantics::Symbol *sym);
void copyLastPrivatize(mlir::Operation *op);
void insertLastPrivateCompare(mlir::Operation *op);
void cloneSymbol(const Fortran::semantics::Symbol *sym);
- void copyFirstPrivateSymbol(const Fortran::semantics::Symbol *sym);
+ void
+ copyFirstPrivateSymbol(const Fortran::semantics::Symbol *sym,
+ mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr);
void copyLastPrivateSymbol(const Fortran::semantics::Symbol *sym,
mlir::OpBuilder::InsertPoint *lastPrivIP);
void insertDeallocs();
@@ -58,10 +82,14 @@ class DataSharingProcessor {
public:
DataSharingProcessor(Fortran::lower::AbstractConverter &converter,
const Fortran::parser::OmpClauseList &opClauseList,
- Fortran::lower::pft::Evaluation &eval)
+ Fortran::lower::pft::Evaluation &eval,
+ bool useDelayedPrivatization = false,
+ Fortran::lower::SymMap *symTable = nullptr)
: hasLastPrivateOp(false), converter(converter),
firOpBuilder(converter.getFirOpBuilder()), opClauseList(opClauseList),
- eval(eval) {}
+ eval(eval), useDelayedPrivatization(useDelayedPrivatization),
+ symTable(symTable) {}
+
// Privatisation is split into two steps.
// Step1 performs cloning of all privatisation clauses and copying for
// firstprivates. Step1 is performed at the place where process/processStep1
@@ -80,6 +108,10 @@ class DataSharingProcessor {
assert(!loopIV && "Loop iteration variable already set");
loopIV = iv;
}
+
+ const DelayedPrivatizationInfo &getDelayedPrivatizationInfo() const {
+ return delayedPrivatizationInfo;
+ }
};
} // namespace omp
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 7953bf83cba0fe..21ad51c53d8742 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -558,6 +558,7 @@ genOrderedRegionOp(Fortran::lower::AbstractConverter &converter,
static mlir::omp::ParallelOp
genParallelOp(Fortran::lower::AbstractConverter &converter,
+ Fortran::lower::SymMap &symTable,
Fortran::semantics::SemanticsContext &semaCtx,
Fortran::lower::pft::Evaluation &eval, bool genNested,
mlir::Location currentLocation,
@@ -590,8 +591,8 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
auto reductionCallback = [&](mlir::Operation *op) {
llvm::SmallVector<mlir::Location> locs(reductionVars.size(),
currentLocation);
- auto block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {},
- reductionTypes, locs);
+ auto *block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {},
+ reductionTypes, locs);
for (auto [arg, prv] :
llvm::zip_equal(reductionSymbols, block->getArguments())) {
converter.bindSymbol(*arg, prv);
@@ -599,13 +600,78 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
return reductionSymbols;
};
- return genOpWithBody<mlir::omp::ParallelOp>(
+ OpWithBodyGenInfo genInfo =
OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
.setGenNested(genNested)
.setOuterCombined(outerCombined)
.setClauses(&clauseList)
.setReductions(&reductionSymbols, &reductionTypes)
- .setGenRegionEntryCb(reductionCallback),
+ .setGenRegionEntryCb(reductionCallback);
+
+ if (!enableDelayedPrivatization) {
+ return genOpWithBody<mlir::omp::ParallelOp>(
+ genInfo,
+ /*resultTypes=*/mlir::TypeRange(), ifClauseOperand,
+ numThreadsClauseOperand, allocateOperands, allocatorOperands,
+ reductionVars,
+ reductionDeclSymbols.empty()
+ ? nullptr
+ : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
+ reductionDeclSymbols),
+ procBindKindAttr, /*private_vars=*/llvm::SmallVector<mlir::Value>{},
+ /*privatizers=*/nullptr);
+ }
+
+ bool privatize = !outerCombined;
+ DataSharingProcessor dsp(converter, clauseList, eval,
+ /*useDelayedPrivatization=*/true, &symTable);
+
+ if (privatize)
+ dsp.processStep1();
+
+ const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo();
+
+ auto genRegionEntryCB = [&](mlir::Operation *op) {
+ auto parallelOp = llvm::cast<mlir::omp::ParallelOp>(op);
+
+ llvm::SmallVector<mlir::Location> reductionLocs(reductionVars.size(),
+ currentLocation);
+
+ mlir::OperandRange privateVars = parallelOp.getPrivateVars();
+ mlir::Region ®ion = parallelOp.getRegion();
+
+ llvm::SmallVector<mlir::Type> privateVarTypes = reductionTypes;
+ privateVarTypes.reserve(privateVarTypes.size() + privateVars.size());
+ llvm::transform(privateVars, std::back_inserter(privateVarTypes),
+ [](mlir::Value v) { return v.getType(); });
+
+ llvm::SmallVector<mlir::Location> privateVarLocs = reductionLocs;
+ privateVarLocs.reserve(privateVarLocs.size() + privateVars.size());
+ llvm::transform(privateVars, std::back_inserter(privateVarLocs),
+ [](mlir::Value v) { return v.getLoc(); });
+
+ converter.getFirOpBuilder().createBlock(®ion, /*insertPt=*/{},
+ privateVarTypes, privateVarLocs);
+
+ llvm::SmallVector<const Fortran::semantics::Symbol *> allSymbols =
+ reductionSymbols;
+ allSymbols.append(delayedPrivatizationInfo.symbols);
+ for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) {
+ converter.bindSymbol(*arg, prv);
+ }
+
+ return allSymbols;
+ };
+
+ // TODO Merge with the reduction CB.
+ genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp);
+
+ llvm::SmallVector<mlir::Attribute> privatizers(
+ delayedPrivatizationInfo.privatizers.begin(),
+ delayedPrivatizationInfo.privatizers.end());
+
+ return genOpWithBody<mlir::omp::ParallelOp>(
+ genInfo,
/*resultTypes=*/mlir::TypeRange(), ifClauseOperand,
numThreadsClauseOperand, allocateOperands, allocatorOperands,
reductionVars,
@@ -613,8 +679,11 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
? nullptr
: mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
reductionDeclSymbols),
- procBindKindAttr, /*private_vars=*/llvm::SmallVector<mlir::Value>{},
- /*privatizers=*/nullptr);
+ procBindKindAttr, delayedPrivatizationInfo.originalAddresses,
+ delayedPrivatizationInfo.privatizers.empty()
+ ? nullptr
+ : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
+ privatizers));
}
static mlir::omp::SectionOp
@@ -1621,7 +1690,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet)
.test(ompDirective)) {
validDirective = true;
- genParallelOp(converter, semaCtx, eval, /*genNested=*/false,
+ genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false,
currentLocation, loopOpClauseList,
/*outerCombined=*/true);
}
@@ -1711,8 +1780,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
currentLocation);
break;
case llvm::omp::Directive::OMPD_parallel:
- genParallelOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
- beginClauseList);
+ genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/true,
+ currentLocation, beginClauseList);
break;
case llvm::omp::Directive::OMPD_single:
genSingleOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
@@ -1769,7 +1838,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
.test(directive.v)) {
bool outerCombined =
directive.v != llvm::omp::Directive::OMPD_target_parallel;
- genParallelOp(converter, semaCtx, eval, /*genNested=*/false,
+ genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false,
currentLocation, beginClauseList, outerCombined);
combinedDirective = true;
}
@@ -1852,7 +1921,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
// Parallel wrapper of PARALLEL SECTIONS construct
if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
- genParallelOp(converter, semaCtx, eval,
+ genParallelOp(converter, symTable, semaCtx, eval,
/*genNested=*/false, currentLocation, sectionsClauseList,
/*outerCombined=*/true);
} else {
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 31b15257d18687..49517f62895dfe 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -24,6 +24,12 @@ llvm::cl::opt<bool> treatIndexAsSection(
llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."),
llvm::cl::init(true));
+llvm::cl::opt<bool> enableDelayedPrivatization(
+ "openmp-enable-delayed-privatization",
+ llvm::cl::desc(
+ "Emit `[first]private` variables as clauses on the MLIR ops."),
+ llvm::cl::init(false));
+
namespace Fortran {
namespace lower {
namespace omp {
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index c346f891f0797e..f57cd7420ce4d5 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -15,6 +15,7 @@
#include "llvm/Support/CommandLine.h"
extern llvm::cl::opt<bool> treatIndexAsSection;
+extern llvm::cl::opt<bool> enableDelayedPrivatization;
namespace fir {
class FirOpBuilder;
diff --git a/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 b/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90
new file mode 100644
index 00000000000000..122542345f104b
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90
@@ -0,0 +1,29 @@
+! Test delayed privatization for the `private` clause.
+
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+
+subroutine delayed_privatization_firstprivate
+ implicit none
+ integer :: var1
+
+!$OMP PARALLEL FIRSTPRIVATE(var1)
+ var1 = 10
+!$OMP END PARALLEL
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate}
+! CHECK-SAME: @[[VAR1_PRIVATIZER_SYM:.*]] : !fir.ref<i32> alloc {
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: !fir.ref<i32>):
+! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_firstprivateEvar1"}
+! CHECK-NEXT: omp.yield(%[[PRIV_ALLOC]] : !fir.ref<i32>)
+! CHECK: } copy {
+! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: !fir.ref<i32>, %[[PRIV_PRIV_ARG:.*]]: !fir.ref<i32>):
+! CHECK: %[[ORIG_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]] : !fir.ref<i32>
+! CHECK: fir.store %[[ORIG_VAL]] to %[[PRIV_PRIV_ARG]] : !fir.ref<i32>
+! CHECK: omp.yield(%[[PRIV_PRIV_ARG]] : !fir.ref<i32>)
+! CHECK: }
+
+! CHECK-LABEL: @_QPdelayed_privatization_firstprivate
+! CHECK: omp.parallel private(@[[VAR1_PRIVATIZER_SYM]] %{{.*}} -> %{{.*}} : !fir.ref<i32>) {
+! CHECK: omp.terminator
+
diff --git a/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90 b/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90
new file mode 100644
index 00000000000000..2e9995ea1fd4c4
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90
@@ -0,0 +1,38 @@
+! Test delayed privatization for the `private` clause.
+
+! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+
+subroutine delayed_privatization_private
+ implicit none
+ integer :: var1
+
+!$OMP PARALLEL PRIVATE(var1)
+ var1 = 10
+!$OMP END PARALLEL
+
+!$OMP PARALLEL PRIVATE(var1)
+ var1 = 20
+!$OMP END PARALLEL
+
+end subroutine
+
+! CHECK-LABEL: omp.private {type = private}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : !fir.ref<i32> alloc {
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: !fir.ref<i32>):
+! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! CHECK-NEXT: omp.yield(%[[PRIV_ALLOC]] : !fir.ref<i32>)
+! CHECK-NOT: } copy {
+
+! CHECK-LABEL: @_QPdelayed_privatization_private
+! CHECK: %[[ORIG_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! CHECK: omp.parallel private(@[[PRIVATIZER_SYM]] %[[ORIG_ALLOC]] -> %[[PAR_ARG:.*]] : !fir.ref<i32>) {
+! CHECK: %[[C10:.*]] = arith.constant 10 : i32
+! CHECK: fir.store %[[C10]] to %[[PAR_ARG]] : !fir.ref<i32>
+! CHECK: omp.terminator
+
+! Test that the same privatizer is used if the a variable with the same type and
+! name was previously privatized.
+! CHECK: omp.parallel private(@[[PRIVATIZER_SYM]] %[[ORIG_ALLOC]] -> %[[PAR_ARG:.*]] : !fir.ref<i32>) {
+! CHECK: %[[C20:.*]] = arith.constant 20 : i32
+! CHECK: fir.store %[[C20]] to %[[PAR_ARG]] : !fir.ref<i32>
+! CHECK: omp.terminator
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90
new file mode 100644
index 00000000000000..e3d2a5a8af2608
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90
@@ -0,0 +1,29 @@
+! Test delayed privatization for the `firstprivate` clause.
+
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+
+subroutine delayed_privatization_firstprivate
+ implicit none
+ integer :: var1
+
+!$omp parallel firstprivate(var1)
+ var1 = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate}
+! CHECK-SAME: @[[VAR1_PRIVATIZER_SYM:.*]] : !fir.ref<i32> alloc {
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: !fir.ref<i32>):
+! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_firstprivateEvar1"}
+! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]] {uniq_name = "_QFdelayed_privatization_firstprivateEvar1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : !fir.ref<i32>)
+! CHECK: } copy {
+! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: !fir.ref<i32>, %[[PRIV_PRIV_ARG:.*]]: !fir.ref<i32>):
+! CHECK: %[[ORIG_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]] : !fir.ref<i32>
+! CHECK: hlfir.assign %[[ORIG_VAL]] to %[[PRIV_PRIV_ARG]] temporary_lhs : i32, !fir.ref<i32>
+! CHECK: omp.yield(%[[PRIV_PRIV_ARG]] : !fir.ref<i32>)
+! CHECK: }
+
+! CHECK-LABEL: @_QPdelayed_privatization_firstprivate
+! CHECK: omp.parallel private(@[[VAR1_PRIVATIZER_SYM]] %{{.*}} -> %{{.*}} : !fir.ref<i32>) {
+! CHECK: omp.terminator
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90
new file mode 100644
index 00000000000000..46eef6eb3bcf6a
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90
@@ -0,0 +1,34 @@
+! Test delayed privatization for both `private` and `firstprivate` clauses.
+
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+
+subroutine delayed_privatization_private_firstprivate
+ implicit none
+ integer :: var1
+ integer :: var2
+
+!$omp parallel private(var1) firstprivate(var2)
+ var1 = 10
+ var2 = var1 + var2
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate}
+! CHECK-SAME: @[[VAR2_PRIVATIZER_SYM:.*]] : !fir.ref<i32> alloc {
+! CHECK: } copy {
+! CHECK: }
+
+! CHECK-LABEL: omp.private {type = private}
+! CHECK-SAME: @[[VAR1_PRIVATIZER_SYM:.*]] : !fir.ref<i32> alloc {
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPdelayed_privatization_private_firstprivate() {
+! CHECK: %[[VAR1_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1"
+! CHECK: %[[VAR1_DECL:.*]]:2 = hlfir.declare %[[VAR1_ALLOC]]
+
+! CHECK: %[[VAR2_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var2"
+! CHECK: %[[VAR2_DECL:.*]]:2 = hlfir.declare %[[VAR2_ALLOC]]
+
+! CHECK: omp.parallel private(
+! CHECK-SAME: @[[VAR1_PRIVATIZER_SYM]] %[[VAR1_DECL]]#0 -> %{{.*}} : !fir.ref<i32>,
+! CHECK-SAME: @[[VAR2_PRIVATIZER_SYM]] %[[VAR2_DECL]]#0 -> %{{.*}} : !fir.ref<i32>) {
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-private.f90 b/flang/test/Lower/OpenMP/delayed-privatization-private.f90
new file mode 100644
index 00000000000000..240e0e71bfcd16
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-private.f90
@@ -0,0 +1,28 @@
+! Test delayed privatization for the `private` clause.
+
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+
+subroutine delayed_privatization_private
+ implicit none
+ integer :: var1
+
+!$omp parallel private(var1)
+ var1 = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = private}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : !fir.ref<i32> alloc {
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: !fir.ref<i32>):
+! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]] {uniq_name = "_QFdelayed_privatization_privateEvar1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : !fir.ref<i32>)
+! CHECK-NOT: } copy {
+
+! CHECK-LABEL: @_QPdelayed_privatization_private
+! CHECK: %[[ORIG_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! CHECK: %[[ORIG_DECL:.*]]:2 = hlfir.declare %[[ORIG_ALLOC]] {uniq_name = "_QFdelayed_privatization_privateEvar1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: omp.parallel private(@[[PRIVATIZER_SYM]] %[[ORIG_DECL]]#0 -> %[[PAR_ARG:.*]] : !fir.ref<i32>) {
+! CHECK: %[[PAR_ARG_DECL:.*]]:2 = hlfir.declare %[[PAR_ARG]] {uniq_name = "_QFdelayed_privatization_privateEvar1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: hlfir.assign %{{.*}} to %[[PAR_ARG_DECL]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.terminator
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-reduction.f90 b/flang/test/Lower/OpenMP/delayed-privatization-reduction.f90
new file mode 100644
index 00000000000000..c61f352b9b055a
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-reduction.f90
@@ -0,0 +1,30 @@
+! Test that reductions and delayed privatization work properly togehter. Since
+! both types of clauses add block arguments to the OpenMP region, we make sure
+! that the block arguments are added in the proper order (reductions first and
+! then delayed privatization.
+
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+
+subroutine red_and_delayed_private
+ integer :: red
+ integer :: prv
+
+ red = 0
+ prv = 10
+
+ !$omp parallel reduction(+:red) private(prv)
+ red = red + 1
+ prv = 20
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = private}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : !fir.ref<i32> alloc {
+
+! CHECK-LABEL: omp.reduction.declare
+! CHECK-SAME: @[[REDUCTION_SYM:.*]] : i32 init
+
+! CHECK-LABEL: _QPred_and_delayed_private
+! CHECK: omp.parallel
+! CHECK-SAME: reduction(@[[REDUCTION_SYM]] %{{.*}} -> %arg0 : !fir.ref<i32>)
+! CHECK-SAME: private(@[[PRIVATIZER_SYM]] %{{.*}} -> %arg1 : !fir.ref<i32>) {
More information about the flang-commits
mailing list