[flang-commits] [flang] [flang] Extend localization support for `do concurrent` (`init` regions) (PR #142564)
Kareem Ergawy via flang-commits
flang-commits at lists.llvm.org
Tue Jun 3 02:27:51 PDT 2025
https://github.com/ergawy created https://github.com/llvm/llvm-project/pull/142564
Extends support for locality specifiers in `do concurrent` by supporting data types that need `init` regions.
This further unifies the paths taken by the compiler for OpenMP privatization clauses and `do concurrent` locality specifiers.
>From 39a1fcb63a883d82fabbed9d652caffb70b444ff Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Tue, 3 Jun 2025 03:00:07 -0500
Subject: [PATCH] [flang] Extend localization support for `do concurrent`
(`init` regions)
Extends support for locality specifiers in `do concurrent` by supporting
data types that need `init` regions.
This further unifies the paths taken by the compiler for OpenMP
privatization clauses and `do concurrent` locality specifiers.
---
.../Lower/Support}/PrivateReductionUtils.h | 12 +-
flang/include/flang/Lower/Support/Utils.h | 4 +-
flang/lib/Lower/Bridge.cpp | 33 +-
flang/lib/Lower/CMakeLists.txt | 2 +-
.../lib/Lower/OpenMP/DataSharingProcessor.cpp | 34 +-
.../Lower/OpenMP/PrivateReductionUtils.cpp | 63 +-
flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 2 +-
.../Lower/Support/PrivateReductionUtils.cpp | 681 ++++++++++++++++++
flang/lib/Lower/Support/Utils.cpp | 56 +-
.../do_concurrent_local_assoc_entity.f90 | 25 +-
.../do_concurrent_local_default_init.f90 | 41 +-
11 files changed, 841 insertions(+), 112 deletions(-)
rename flang/{lib/Lower/OpenMP => include/flang/Lower/Support}/PrivateReductionUtils.h (90%)
create mode 100644 flang/lib/Lower/Support/PrivateReductionUtils.cpp
diff --git a/flang/lib/Lower/OpenMP/PrivateReductionUtils.h b/flang/include/flang/Lower/Support/PrivateReductionUtils.h
similarity index 90%
rename from flang/lib/Lower/OpenMP/PrivateReductionUtils.h
rename to flang/include/flang/Lower/Support/PrivateReductionUtils.h
index 9f8c9aee4d8ec..5e7f4e11d5a53 100644
--- a/flang/lib/Lower/OpenMP/PrivateReductionUtils.h
+++ b/flang/include/flang/Lower/Support/PrivateReductionUtils.h
@@ -37,10 +37,14 @@ class AbstractConverter;
namespace omp {
-enum class DeclOperationKind { Private, FirstPrivate, Reduction };
+enum class DeclOperationKind {
+ PrivateOrLocal,
+ FirstPrivateOrLocalInit,
+ Reduction
+};
inline bool isPrivatization(DeclOperationKind kind) {
- return (kind == DeclOperationKind::FirstPrivate) ||
- (kind == DeclOperationKind::Private);
+ return (kind == DeclOperationKind::FirstPrivateOrLocalInit) ||
+ (kind == DeclOperationKind::PrivateOrLocal);
}
inline bool isReduction(DeclOperationKind kind) {
return kind == DeclOperationKind::Reduction;
@@ -56,7 +60,7 @@ void populateByRefInitAndCleanupRegions(
mlir::Value allocatedPrivVarArg, mlir::Value moldArg,
mlir::Region &cleanupRegion, DeclOperationKind kind,
const Fortran::semantics::Symbol *sym = nullptr,
- bool cannotHaveNonDefaultLowerBounds = false);
+ bool cannotHaveNonDefaultLowerBounds = false, bool isDoConcurrent = false);
/// Generate a fir::ShapeShift op describing the provided boxed array.
/// `cannotHaveNonDefaultLowerBounds` should be set if `box` is known to have
diff --git a/flang/include/flang/Lower/Support/Utils.h b/flang/include/flang/Lower/Support/Utils.h
index 8ad3a903beee9..e544542e2ff71 100644
--- a/flang/include/flang/Lower/Support/Utils.h
+++ b/flang/include/flang/Lower/Support/Utils.h
@@ -20,6 +20,7 @@
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/BuiltinAttributes.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"
namespace Fortran::lower {
@@ -98,8 +99,9 @@ bool isEqual(const Fortran::lower::ExplicitIterSpace::ArrayBases &x,
template <typename OpType, typename OperandsStructType>
void privatizeSymbol(
lower::AbstractConverter &converter, fir::FirOpBuilder &firOpBuilder,
- lower::SymMap &symTable, std::function<void(OpType, mlir::Type)> initGen,
+ lower::SymMap &symTable,
llvm::SetVector<const semantics::Symbol *> &allPrivatizedSymbols,
+ llvm::SmallSet<const semantics::Symbol *, 16> &mightHaveReadHostSym,
const semantics::Symbol *symToPrivatize, OperandsStructType *clauseOps);
} // end namespace Fortran::lower
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 4e6db3eaa990d..2ea838673dd21 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -12,7 +12,6 @@
#include "flang/Lower/Bridge.h"
-#include "OpenMP/DataSharingProcessor.h"
#include "flang/Lower/Allocatable.h"
#include "flang/Lower/CallInterface.h"
#include "flang/Lower/Coarray.h"
@@ -2038,44 +2037,38 @@ class FirConverter : public Fortran::lower::AbstractConverter {
bool useDelayedPriv =
enableDelayedPrivatizationStaging && doConcurrentLoopOp;
llvm::SetVector<const Fortran::semantics::Symbol *> allPrivatizedSymbols;
+ llvm::SmallSet<const Fortran::semantics::Symbol *, 16> mightHaveReadHostSym;
- for (const Fortran::semantics::Symbol *sym : info.localSymList) {
+ for (const Fortran::semantics::Symbol *symToPrivatize : info.localSymList) {
if (useDelayedPriv) {
Fortran::lower::privatizeSymbol<fir::LocalitySpecifierOp>(
- *this, this->getFirOpBuilder(), localSymbols,
- [this](fir::LocalitySpecifierOp result, mlir::Type argType) {
- TODO(this->toLocation(),
- "Localizers that need init regions are not supported yet.");
- },
- allPrivatizedSymbols, sym, &privateClauseOps);
+ *this, this->getFirOpBuilder(), localSymbols, allPrivatizedSymbols,
+ mightHaveReadHostSym, symToPrivatize, &privateClauseOps);
continue;
}
- createHostAssociateVarClone(*sym, /*skipDefaultInit=*/false);
+ createHostAssociateVarClone(*symToPrivatize, /*skipDefaultInit=*/false);
}
- for (const Fortran::semantics::Symbol *sym : info.localInitSymList) {
+ for (const Fortran::semantics::Symbol *symToPrivatize :
+ info.localInitSymList) {
if (useDelayedPriv) {
Fortran::lower::privatizeSymbol<fir::LocalitySpecifierOp>(
- *this, this->getFirOpBuilder(), localSymbols,
- [this](fir::LocalitySpecifierOp result, mlir::Type argType) {
- TODO(this->toLocation(),
- "Localizers that need init regions are not supported yet.");
- },
- allPrivatizedSymbols, sym, &privateClauseOps);
+ *this, this->getFirOpBuilder(), localSymbols, allPrivatizedSymbols,
+ mightHaveReadHostSym, symToPrivatize, &privateClauseOps);
continue;
}
- createHostAssociateVarClone(*sym, /*skipDefaultInit=*/true);
+ createHostAssociateVarClone(*symToPrivatize, /*skipDefaultInit=*/true);
const auto *hostDetails =
- sym->detailsIf<Fortran::semantics::HostAssocDetails>();
+ symToPrivatize->detailsIf<Fortran::semantics::HostAssocDetails>();
assert(hostDetails && "missing locality spec host symbol");
const Fortran::semantics::Symbol *hostSym = &hostDetails->symbol();
Fortran::evaluate::ExpressionAnalyzer ea{semanticsContext};
Fortran::evaluate::Assignment assign{
- ea.Designate(Fortran::evaluate::DataRef{*sym}).value(),
+ ea.Designate(Fortran::evaluate::DataRef{*symToPrivatize}).value(),
ea.Designate(Fortran::evaluate::DataRef{*hostSym}).value()};
- if (Fortran::semantics::IsPointer(*sym))
+ if (Fortran::semantics::IsPointer(*symToPrivatize))
assign.u = Fortran::evaluate::Assignment::BoundsSpec{};
genAssignment(assign);
}
diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt
index bc817ff8f1f3e..9c5db2b126510 100644
--- a/flang/lib/Lower/CMakeLists.txt
+++ b/flang/lib/Lower/CMakeLists.txt
@@ -28,11 +28,11 @@ add_flang_library(FortranLower
OpenMP/DataSharingProcessor.cpp
OpenMP/Decomposer.cpp
OpenMP/OpenMP.cpp
- OpenMP/PrivateReductionUtils.cpp
OpenMP/ReductionProcessor.cpp
OpenMP/Utils.cpp
PFTBuilder.cpp
Runtime.cpp
+ Support/PrivateReductionUtils.cpp
Support/Utils.cpp
SymbolMap.cpp
VectorSubscripts.cpp
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 03109c82a976a..8b334d7a392ac 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -12,10 +12,10 @@
#include "DataSharingProcessor.h"
-#include "PrivateReductionUtils.h"
#include "Utils.h"
#include "flang/Lower/ConvertVariable.h"
#include "flang/Lower/PFTBuilder.h"
+#include "flang/Lower/Support/PrivateReductionUtils.h"
#include "flang/Lower/Support/Utils.h"
#include "flang/Lower/SymbolMap.h"
#include "flang/Optimizer/Builder/BoxValue.h"
@@ -537,38 +537,10 @@ void DataSharingProcessor::privatizeSymbol(
return;
}
- auto initGen = [&](mlir::omp::PrivateClauseOp result, mlir::Type argType) {
- lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*symToPrivatize);
- assert(hsb && "Host symbol box not found");
- hlfir::Entity entity{hsb.getAddr()};
- bool cannotHaveNonDefaultLowerBounds =
- !entity.mayHaveNonDefaultLowerBounds();
-
- mlir::Region &initRegion = result.getInitRegion();
- mlir::Location symLoc = hsb.getAddr().getLoc();
- mlir::Block *initBlock = firOpBuilder.createBlock(
- &initRegion, /*insertPt=*/{}, {argType, argType}, {symLoc, symLoc});
-
- bool emitCopyRegion =
- symToPrivatize->test(semantics::Symbol::Flag::OmpFirstPrivate);
-
- populateByRefInitAndCleanupRegions(
- converter, symLoc, argType, /*scalarInitValue=*/nullptr, initBlock,
- result.getInitPrivateArg(), result.getInitMoldArg(),
- result.getDeallocRegion(),
- emitCopyRegion ? omp::DeclOperationKind::FirstPrivate
- : omp::DeclOperationKind::Private,
- symToPrivatize, cannotHaveNonDefaultLowerBounds);
- // TODO: currently there are false positives from dead uses of the mold
- // arg
- if (result.initReadsFromMold())
- mightHaveReadHostSym.insert(symToPrivatize);
- };
-
Fortran::lower::privatizeSymbol<mlir::omp::PrivateClauseOp,
mlir::omp::PrivateClauseOps>(
- converter, firOpBuilder, symTable, initGen, allPrivatizedSymbols,
- symToPrivatize, clauseOps);
+ converter, firOpBuilder, symTable, allPrivatizedSymbols,
+ mightHaveReadHostSym, symToPrivatize, clauseOps);
}
} // namespace omp
} // namespace lower
diff --git a/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp b/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp
index 268c7828ab56f..ea1c78fb0320e 100644
--- a/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp
+++ b/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp
@@ -10,8 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "PrivateReductionUtils.h"
-
+#include "flang/Lower/Support/PrivateReductionUtils.h"
#include "flang/Lower/AbstractConverter.h"
#include "flang/Lower/Allocatable.h"
#include "flang/Lower/ConvertVariable.h"
@@ -42,7 +41,8 @@ static bool hasFinalization(const Fortran::semantics::Symbol &sym) {
static void createCleanupRegion(Fortran::lower::AbstractConverter &converter,
mlir::Location loc, mlir::Type argType,
mlir::Region &cleanupRegion,
- const Fortran::semantics::Symbol *sym) {
+ const Fortran::semantics::Symbol *sym,
+ bool isDoConcurrent) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
assert(cleanupRegion.empty());
mlir::Block *block = builder.createBlock(&cleanupRegion, cleanupRegion.end(),
@@ -72,7 +72,10 @@ static void createCleanupRegion(Fortran::lower::AbstractConverter &converter,
fir::MutableBoxValue mutableBox{converted, /*lenParameters=*/{},
/*mutableProperties=*/{}};
Fortran::lower::genDeallocateIfAllocated(converter, mutableBox, loc);
- builder.create<mlir::omp::YieldOp>(loc);
+ if (isDoConcurrent)
+ builder.create<fir::YieldOp>(loc);
+ else
+ builder.create<mlir::omp::YieldOp>(loc);
return;
}
}
@@ -100,7 +103,10 @@ static void createCleanupRegion(Fortran::lower::AbstractConverter &converter,
builder.create<fir::FreeMemOp>(loc, cast);
builder.setInsertionPointAfter(ifOp);
- builder.create<mlir::omp::YieldOp>(loc);
+ if (isDoConcurrent)
+ builder.create<fir::YieldOp>(loc);
+ else
+ builder.create<mlir::omp::YieldOp>(loc);
return;
}
@@ -115,7 +121,11 @@ static void createCleanupRegion(Fortran::lower::AbstractConverter &converter,
addr = builder.createConvert(loc, heapTy, addr);
builder.create<fir::FreeMemOp>(loc, addr);
- builder.create<mlir::omp::YieldOp>(loc);
+ if (isDoConcurrent)
+ builder.create<fir::YieldOp>(loc);
+ else
+ builder.create<mlir::omp::YieldOp>(loc);
+
return;
}
@@ -273,12 +283,13 @@ class PopulateInitAndCleanupRegionsHelper {
mlir::Value allocatedPrivVarArg, mlir::Value moldArg,
mlir::Block *initBlock, mlir::Region &cleanupRegion,
DeclOperationKind kind, const Fortran::semantics::Symbol *sym,
- bool cannotHaveLowerBounds)
+ bool cannotHaveLowerBounds, bool isDoConcurrent)
: converter{converter}, builder{converter.getFirOpBuilder()}, loc{loc},
argType{argType}, scalarInitValue{scalarInitValue},
allocatedPrivVarArg{allocatedPrivVarArg}, moldArg{moldArg},
initBlock{initBlock}, cleanupRegion{cleanupRegion}, kind{kind},
- sym{sym}, cannotHaveNonDefaultLowerBounds{cannotHaveLowerBounds} {
+ sym{sym}, cannotHaveNonDefaultLowerBounds{cannotHaveLowerBounds},
+ isDoConcurrent{isDoConcurrent} {
valType = fir::unwrapRefType(argType);
}
@@ -324,8 +335,13 @@ class PopulateInitAndCleanupRegionsHelper {
/// lower bounds then we don't need to generate code to read them.
bool cannotHaveNonDefaultLowerBounds;
+ bool isDoConcurrent;
+
void createYield(mlir::Value ret) {
- builder.create<mlir::omp::YieldOp>(loc, ret);
+ if (isDoConcurrent)
+ builder.create<fir::YieldOp>(loc, ret);
+ else
+ builder.create<mlir::omp::YieldOp>(loc, ret);
}
void initTrivialType() {
@@ -429,11 +445,12 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedScalar(
/*slice=*/mlir::Value{}, lenParams);
initializeIfDerivedTypeBox(
builder, loc, box, getLoadedMoldArg(), needsInitialization,
- /*isFirstPrivate=*/kind == DeclOperationKind::FirstPrivate);
+ /*isFirstPrivate=*/kind == DeclOperationKind::FirstPrivateOrLocalInit);
fir::StoreOp lastOp =
builder.create<fir::StoreOp>(loc, box, allocatedPrivVarArg);
- createCleanupRegion(converter, loc, argType, cleanupRegion, sym);
+ createCleanupRegion(converter, loc, argType, cleanupRegion, sym,
+ isDoConcurrent);
if (ifUnallocated)
builder.setInsertionPointAfter(ifUnallocated);
@@ -470,13 +487,14 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray(
allocatedArray, shape);
initializeIfDerivedTypeBox(
builder, loc, firClass, source, needsInitialization,
- /*isFirstprivate=*/kind == DeclOperationKind::FirstPrivate);
+ /*isFirstprivate=*/kind == DeclOperationKind::FirstPrivateOrLocalInit);
builder.create<fir::StoreOp>(loc, firClass, allocatedPrivVarArg);
if (ifUnallocated)
builder.setInsertionPointAfter(ifUnallocated);
createYield(allocatedPrivVarArg);
mlir::OpBuilder::InsertionGuard guard(builder);
- createCleanupRegion(converter, loc, argType, cleanupRegion, sym);
+ createCleanupRegion(converter, loc, argType, cleanupRegion, sym,
+ isDoConcurrent);
return;
}
@@ -492,7 +510,8 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray(
"createTempFromMold decides this statically");
if (cstNeedsDealloc.has_value() && *cstNeedsDealloc != false) {
mlir::OpBuilder::InsertionGuard guard(builder);
- createCleanupRegion(converter, loc, argType, cleanupRegion, sym);
+ createCleanupRegion(converter, loc, argType, cleanupRegion, sym,
+ isDoConcurrent);
} else {
assert(!isAllocatableOrPointer &&
"Pointer-like arrays must be heap allocated");
@@ -520,7 +539,7 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray(
initializeIfDerivedTypeBox(
builder, loc, box, getLoadedMoldArg(), needsInitialization,
- /*isFirstPrivate=*/kind == DeclOperationKind::FirstPrivate);
+ /*isFirstPrivate=*/kind == DeclOperationKind::FirstPrivateOrLocalInit);
builder.create<fir::StoreOp>(loc, box, allocatedPrivVarArg);
if (ifUnallocated)
@@ -548,7 +567,8 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxchar(
loc, eleTy, /*name=*/{}, /*shape=*/{}, /*lenParams=*/len);
mlir::Value boxChar = charExprHelper.createEmboxChar(privateAddr, len);
- createCleanupRegion(converter, loc, argType, cleanupRegion, sym);
+ createCleanupRegion(converter, loc, argType, cleanupRegion, sym,
+ isDoConcurrent);
builder.setInsertionPointToEnd(initBlock);
createYield(boxChar);
@@ -563,10 +583,11 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupUnboxedDerivedType(
mlir::Value moldBox = builder.create<fir::EmboxOp>(loc, boxedTy, moldArg);
initializeIfDerivedTypeBox(builder, loc, newBox, moldBox, needsInitialization,
/*isFirstPrivate=*/kind ==
- DeclOperationKind::FirstPrivate);
+ DeclOperationKind::FirstPrivateOrLocalInit);
if (sym && hasFinalization(*sym))
- createCleanupRegion(converter, loc, argType, cleanupRegion, sym);
+ createCleanupRegion(converter, loc, argType, cleanupRegion, sym,
+ isDoConcurrent);
builder.setInsertionPointToEnd(initBlock);
createYield(allocatedPrivVarArg);
@@ -637,10 +658,12 @@ void Fortran::lower::omp::populateByRefInitAndCleanupRegions(
mlir::Type argType, mlir::Value scalarInitValue, mlir::Block *initBlock,
mlir::Value allocatedPrivVarArg, mlir::Value moldArg,
mlir::Region &cleanupRegion, DeclOperationKind kind,
- const Fortran::semantics::Symbol *sym, bool cannotHaveLowerBounds) {
+ const Fortran::semantics::Symbol *sym, bool cannotHaveLowerBounds,
+ bool isDoConcurrent) {
PopulateInitAndCleanupRegionsHelper helper(
converter, loc, argType, scalarInitValue, allocatedPrivVarArg, moldArg,
- initBlock, cleanupRegion, kind, sym, cannotHaveLowerBounds);
+ initBlock, cleanupRegion, kind, sym, cannotHaveLowerBounds,
+ isDoConcurrent);
helper.populateByRefInitAndCleanupRegions();
// Often we load moldArg to check something (e.g. length parameters, shape)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index 7ef0f2a0ef7c5..d7f520e86e532 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -12,9 +12,9 @@
#include "ReductionProcessor.h"
-#include "PrivateReductionUtils.h"
#include "flang/Lower/AbstractConverter.h"
#include "flang/Lower/ConvertType.h"
+#include "flang/Lower/Support/PrivateReductionUtils.h"
#include "flang/Lower/SymbolMap.h"
#include "flang/Optimizer/Builder/Complex.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
diff --git a/flang/lib/Lower/Support/PrivateReductionUtils.cpp b/flang/lib/Lower/Support/PrivateReductionUtils.cpp
new file mode 100644
index 0000000000000..3753b77d6327f
--- /dev/null
+++ b/flang/lib/Lower/Support/PrivateReductionUtils.cpp
@@ -0,0 +1,681 @@
+//===-- PrivateReductionUtils.cpp -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Lower/Support/PrivateReductionUtils.h"
+
+#include "flang/Lower/AbstractConverter.h"
+#include "flang/Lower/Allocatable.h"
+#include "flang/Lower/ConvertVariable.h"
+#include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/Character.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/HLFIRTools.h"
+#include "flang/Optimizer/Builder/Runtime/Derived.h"
+#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Optimizer/Support/FatalError.h"
+#include "flang/Semantics/symbol.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/Location.h"
+
+static bool hasFinalization(const Fortran::semantics::Symbol &sym) {
+ if (sym.has<Fortran::semantics::ObjectEntityDetails>())
+ if (const Fortran::semantics::DeclTypeSpec *declTypeSpec = sym.GetType())
+ if (const Fortran::semantics::DerivedTypeSpec *derivedTypeSpec =
+ declTypeSpec->AsDerived())
+ return Fortran::semantics::IsFinalizable(*derivedTypeSpec);
+ return false;
+}
+
+static void createCleanupRegion(Fortran::lower::AbstractConverter &converter,
+ mlir::Location loc, mlir::Type argType,
+ mlir::Region &cleanupRegion,
+ const Fortran::semantics::Symbol *sym,
+ bool isDoConcurrent) {
+ fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+ assert(cleanupRegion.empty());
+ mlir::Block *block = builder.createBlock(&cleanupRegion, cleanupRegion.end(),
+ {argType}, {loc});
+ builder.setInsertionPointToEnd(block);
+
+ auto typeError = [loc]() {
+ fir::emitFatalError(loc,
+ "Attempt to create an omp cleanup region "
+ "for a type that wasn't allocated",
+ /*genCrashDiag=*/true);
+ };
+
+ mlir::Type valTy = fir::unwrapRefType(argType);
+ if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(valTy)) {
+ // TODO: what about undoing init of unboxed derived types?
+ if (auto recTy = mlir::dyn_cast<fir::RecordType>(
+ fir::unwrapSequenceType(fir::dyn_cast_ptrOrBoxEleTy(boxTy)))) {
+ mlir::Type eleTy = boxTy.getEleTy();
+ if (mlir::isa<fir::PointerType, fir::HeapType>(eleTy)) {
+ mlir::Type mutableBoxTy =
+ fir::ReferenceType::get(fir::BoxType::get(eleTy));
+ mlir::Value converted =
+ builder.createConvert(loc, mutableBoxTy, block->getArgument(0));
+ if (recTy.getNumLenParams() > 0)
+ TODO(loc, "Deallocate box with length parameters");
+ fir::MutableBoxValue mutableBox{converted, /*lenParameters=*/{},
+ /*mutableProperties=*/{}};
+ Fortran::lower::genDeallocateIfAllocated(converter, mutableBox, loc);
+ if (isDoConcurrent)
+ builder.create<fir::YieldOp>(loc);
+ else
+ builder.create<mlir::omp::YieldOp>(loc);
+ return;
+ }
+ }
+
+ // TODO: just replace this whole body with
+ // Fortran::lower::genDeallocateIfAllocated (not done now to avoid test
+ // churn)
+
+ mlir::Value arg = builder.loadIfRef(loc, block->getArgument(0));
+ assert(mlir::isa<fir::BaseBoxType>(arg.getType()));
+
+ // Deallocate box
+ // The FIR type system doesn't nesecarrily know that this is a mutable box
+ // if we allocated the thread local array on the heap to avoid looped stack
+ // allocations.
+ mlir::Value addr =
+ hlfir::genVariableRawAddress(loc, builder, hlfir::Entity{arg});
+ mlir::Value isAllocated = builder.genIsNotNullAddr(loc, addr);
+ fir::IfOp ifOp =
+ builder.create<fir::IfOp>(loc, isAllocated, /*withElseRegion=*/false);
+ builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+
+ mlir::Value cast = builder.createConvert(
+ loc, fir::HeapType::get(fir::dyn_cast_ptrEleTy(addr.getType())), addr);
+ builder.create<fir::FreeMemOp>(loc, cast);
+
+ builder.setInsertionPointAfter(ifOp);
+ if (isDoConcurrent)
+ builder.create<fir::YieldOp>(loc);
+ else
+ builder.create<mlir::omp::YieldOp>(loc);
+ return;
+ }
+
+ if (auto boxCharTy = mlir::dyn_cast<fir::BoxCharType>(argType)) {
+ auto [addr, len] =
+ fir::factory::CharacterExprHelper{builder, loc}.createUnboxChar(
+ block->getArgument(0));
+
+ // convert addr to a heap type so it can be used with fir::FreeMemOp
+ auto refTy = mlir::cast<fir::ReferenceType>(addr.getType());
+ auto heapTy = fir::HeapType::get(refTy.getEleTy());
+ addr = builder.createConvert(loc, heapTy, addr);
+
+ builder.create<fir::FreeMemOp>(loc, addr);
+ if (isDoConcurrent)
+ builder.create<fir::YieldOp>(loc);
+ else
+ builder.create<mlir::omp::YieldOp>(loc);
+
+ return;
+ }
+
+ typeError();
+}
+
+fir::ShapeShiftOp Fortran::lower::omp::getShapeShift(
+ fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value box,
+ bool cannotHaveNonDefaultLowerBounds, bool useDefaultLowerBounds) {
+ fir::SequenceType sequenceType = mlir::cast<fir::SequenceType>(
+ hlfir::getFortranElementOrSequenceType(box.getType()));
+ const unsigned rank = sequenceType.getDimension();
+
+ llvm::SmallVector<mlir::Value> lbAndExtents;
+ lbAndExtents.reserve(rank * 2);
+ mlir::Type idxTy = builder.getIndexType();
+
+ mlir::Value oneVal;
+ auto one = [&] {
+ if (!oneVal)
+ oneVal = builder.createIntegerConstant(loc, idxTy, 1);
+ return oneVal;
+ };
+
+ if ((cannotHaveNonDefaultLowerBounds || useDefaultLowerBounds) &&
+ !sequenceType.hasDynamicExtents()) {
+ // We don't need fir::BoxDimsOp if all of the extents are statically known
+ // and we can assume default lower bounds. This helps avoids reads from the
+ // mold arg.
+ // We may also want to use default lower bounds to iterate through array
+ // elements without having to adjust each index.
+ for (int64_t extent : sequenceType.getShape()) {
+ assert(extent != sequenceType.getUnknownExtent());
+ lbAndExtents.push_back(one());
+ mlir::Value extentVal = builder.createIntegerConstant(loc, idxTy, extent);
+ lbAndExtents.push_back(extentVal);
+ }
+ } else {
+ for (unsigned i = 0; i < rank; ++i) {
+ // TODO: ideally we want to hoist box reads out of the critical section.
+ // We could do this by having box dimensions in block arguments like
+ // OpenACC does
+ mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
+ auto dimInfo =
+ builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, box, dim);
+ lbAndExtents.push_back(useDefaultLowerBounds ? one()
+ : dimInfo.getLowerBound());
+ lbAndExtents.push_back(dimInfo.getExtent());
+ }
+ }
+
+ auto shapeShiftTy = fir::ShapeShiftType::get(builder.getContext(), rank);
+ auto shapeShift =
+ builder.create<fir::ShapeShiftOp>(loc, shapeShiftTy, lbAndExtents);
+ return shapeShift;
+}
+
+// Initialize box newBox using moldBox. These should both have the same type and
+// be boxes containing derived types e.g.
+// fir.box<!fir.type<>>
+// fir.box<!fir.heap<!fir.type<>>
+// fir.box<!fir.heap<!fir.array<fir.type<>>>
+// fir.class<...<!fir.type<>>>
+// If the type doesn't match , this does nothing
+static void initializeIfDerivedTypeBox(fir::FirOpBuilder &builder,
+ mlir::Location loc, mlir::Value newBox,
+ mlir::Value moldBox, bool hasInitializer,
+ bool isFirstPrivate) {
+ assert(moldBox.getType() == newBox.getType());
+ fir::BoxType boxTy = mlir::dyn_cast<fir::BoxType>(newBox.getType());
+ fir::ClassType classTy = mlir::dyn_cast<fir::ClassType>(newBox.getType());
+ if (!boxTy && !classTy)
+ return;
+
+ // remove pointer and array types in the middle
+ mlir::Type eleTy = boxTy ? boxTy.getElementType() : classTy.getEleTy();
+ mlir::Type derivedTy = fir::unwrapRefType(eleTy);
+ if (auto array = mlir::dyn_cast<fir::SequenceType>(derivedTy))
+ derivedTy = array.getElementType();
+
+ if (!fir::isa_derived(derivedTy))
+ return;
+
+ if (hasInitializer)
+ fir::runtime::genDerivedTypeInitialize(builder, loc, newBox);
+
+ if (hlfir::mayHaveAllocatableComponent(derivedTy) && !isFirstPrivate)
+ fir::runtime::genDerivedTypeInitializeClone(builder, loc, newBox, moldBox);
+}
+
+static void getLengthParameters(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value moldArg,
+ llvm::SmallVectorImpl<mlir::Value> &lenParams) {
+ // We pass derived types unboxed and so are not self-contained entities.
+ // Assume that unboxed derived types won't need length paramters.
+ if (!hlfir::isFortranEntity(moldArg))
+ return;
+
+ hlfir::genLengthParameters(loc, builder, hlfir::Entity{moldArg}, lenParams);
+ if (lenParams.empty())
+ return;
+
+ // The verifier for EmboxOp doesn't allow length parameters when the the
+ // character already has static LEN. genLengthParameters may still return them
+ // in this case.
+ auto strTy = mlir::dyn_cast<fir::CharacterType>(
+ fir::getFortranElementType(moldArg.getType()));
+
+ if (strTy && strTy.hasConstantLen())
+ lenParams.resize(0);
+}
+
+static bool
+isDerivedTypeNeedingInitialization(const Fortran::semantics::Symbol &sym) {
+ // Fortran::lower::hasDefaultInitialization returns false for ALLOCATABLE, so
+ // re-implement here.
+ // ignorePointer=true because either the pointer points to the same target as
+ // the original variable, or it is uninitialized.
+ if (const Fortran::semantics::DeclTypeSpec *declTypeSpec = sym.GetType())
+ if (const Fortran::semantics::DerivedTypeSpec *derivedTypeSpec =
+ declTypeSpec->AsDerived())
+ return derivedTypeSpec->HasDefaultInitialization(
+ /*ignoreAllocatable=*/false, /*ignorePointer=*/true);
+ return false;
+}
+
+static mlir::Value generateZeroShapeForRank(fir::FirOpBuilder &builder,
+ mlir::Location loc,
+ mlir::Value moldArg) {
+ mlir::Type moldType = fir::unwrapRefType(moldArg.getType());
+ mlir::Type eleType = fir::dyn_cast_ptrOrBoxEleTy(moldType);
+ fir::SequenceType seqTy =
+ mlir::dyn_cast_if_present<fir::SequenceType>(eleType);
+ if (!seqTy)
+ return mlir::Value{};
+
+ unsigned rank = seqTy.getShape().size();
+ mlir::Value zero =
+ builder.createIntegerConstant(loc, builder.getIndexType(), 0);
+ mlir::SmallVector<mlir::Value> dims;
+ dims.resize(rank, zero);
+ mlir::Type shapeTy = fir::ShapeType::get(builder.getContext(), rank);
+ return builder.create<fir::ShapeOp>(loc, shapeTy, dims);
+}
+
+namespace {
+using namespace Fortran::lower::omp;
+/// Class to store shared data so we don't have to maintain so many function
+/// arguments
+class PopulateInitAndCleanupRegionsHelper {
+public:
+ PopulateInitAndCleanupRegionsHelper(
+ Fortran::lower::AbstractConverter &converter, mlir::Location loc,
+ mlir::Type argType, mlir::Value scalarInitValue,
+ mlir::Value allocatedPrivVarArg, mlir::Value moldArg,
+ mlir::Block *initBlock, mlir::Region &cleanupRegion,
+ DeclOperationKind kind, const Fortran::semantics::Symbol *sym,
+ bool cannotHaveLowerBounds, bool isDoConcurrent)
+ : converter{converter}, builder{converter.getFirOpBuilder()}, loc{loc},
+ argType{argType}, scalarInitValue{scalarInitValue},
+ allocatedPrivVarArg{allocatedPrivVarArg}, moldArg{moldArg},
+ initBlock{initBlock}, cleanupRegion{cleanupRegion}, kind{kind},
+ sym{sym}, cannotHaveNonDefaultLowerBounds{cannotHaveLowerBounds},
+ isDoConcurrent{isDoConcurrent} {
+ valType = fir::unwrapRefType(argType);
+ }
+
+ void populateByRefInitAndCleanupRegions();
+
+private:
+ Fortran::lower::AbstractConverter &converter;
+ fir::FirOpBuilder &builder;
+
+ mlir::Location loc;
+
+ /// The type of the block arguments passed into the init and cleanup regions
+ mlir::Type argType;
+
+ /// argType stripped of any references
+ mlir::Type valType;
+
+ /// sclarInitValue: The value scalars should be initialized to (only
+ /// valid for reductions).
+ /// allocatedPrivVarArg: The allocation for the private
+ /// variable.
+ /// moldArg: The original variable.
+ /// loadedMoldArg: The original variable, loaded. Access via
+ /// getLoadedMoldArg().
+ mlir::Value scalarInitValue, allocatedPrivVarArg, moldArg, loadedMoldArg;
+
+ /// The first block in the init region.
+ mlir::Block *initBlock;
+
+ /// The region to insert clanup code into.
+ mlir::Region &cleanupRegion;
+
+ /// The kind of operation we are generating init/cleanup regions for.
+ DeclOperationKind kind;
+
+ /// (optional) The symbol being privatized.
+ const Fortran::semantics::Symbol *sym;
+
+ /// Any length parameters which have been fetched for the type
+ mlir::SmallVector<mlir::Value> lenParams;
+
+ /// If the source variable being privatized definitely can't have non-default
+ /// lower bounds then we don't need to generate code to read them.
+ bool cannotHaveNonDefaultLowerBounds;
+
+ bool isDoConcurrent;
+
+ void createYield(mlir::Value ret) {
+ if (isDoConcurrent)
+ builder.create<fir::YieldOp>(loc, ret);
+ else
+ builder.create<mlir::omp::YieldOp>(loc, ret);
+ }
+
+ void initTrivialType() {
+ builder.setInsertionPointToEnd(initBlock);
+ if (scalarInitValue)
+ builder.createStoreWithConvert(loc, scalarInitValue, allocatedPrivVarArg);
+ createYield(allocatedPrivVarArg);
+ }
+
+ void initBoxedPrivatePointer(fir::BaseBoxType boxTy);
+
+ /// e.g. !fir.box<!fir.heap<i32>>, !fir.box<!fir.type<....>>,
+ /// !fir.box<!fir.char<...>>
+ void initAndCleanupBoxedScalar(fir::BaseBoxType boxTy,
+ bool needsInitialization);
+
+ void initAndCleanupBoxedArray(fir::BaseBoxType boxTy,
+ bool needsInitialization);
+
+ void initAndCleanupBoxchar(fir::BoxCharType boxCharTy);
+
+ void initAndCleanupUnboxedDerivedType(bool needsInitialization);
+
+ fir::IfOp handleNullAllocatable();
+
+ // Do this lazily so that we don't load it when it is not used.
+ inline mlir::Value getLoadedMoldArg() {
+ if (loadedMoldArg)
+ return loadedMoldArg;
+ loadedMoldArg = builder.loadIfRef(loc, moldArg);
+ return loadedMoldArg;
+ }
+};
+
+} // namespace
+
+/// The initial state of a private pointer is undefined so we don't need to
+/// match the mold argument (OpenMP 5.2 end of page 106).
+void PopulateInitAndCleanupRegionsHelper::initBoxedPrivatePointer(
+ fir::BaseBoxType boxTy) {
+ assert(isPrivatization(kind));
+ // we need a shape with the right rank so that the embox op is lowered
+ // to an llvm struct of the right type. This returns nullptr if the types
+ // aren't right.
+ mlir::Value shape = generateZeroShapeForRank(builder, loc, moldArg);
+ // Just incase, do initialize the box with a null value
+ mlir::Value null = builder.createNullConstant(loc, boxTy.getEleTy());
+ mlir::Value nullBox;
+ nullBox = builder.create<fir::EmboxOp>(loc, boxTy, null, shape,
+ /*slice=*/mlir::Value{}, lenParams);
+ builder.create<fir::StoreOp>(loc, nullBox, allocatedPrivVarArg);
+ createYield(allocatedPrivVarArg);
+}
+/// Check if an allocatable box is unallocated. If so, initialize the boxAlloca
+/// to be unallocated e.g.
+/// %box_alloca = fir.alloca !fir.box<!fir.heap<...>>
+/// %addr = fir.box_addr %box
+/// if (%addr == 0) {
+/// %nullbox = fir.embox %addr
+/// fir.store %nullbox to %box_alloca
+/// } else {
+/// // ...
+/// fir.store %something to %box_alloca
+/// }
+/// omp.yield %box_alloca
+fir::IfOp PopulateInitAndCleanupRegionsHelper::handleNullAllocatable() {
+ mlir::Value addr = builder.create<fir::BoxAddrOp>(loc, getLoadedMoldArg());
+ mlir::Value isNotAllocated = builder.genIsNullAddr(loc, addr);
+ fir::IfOp ifOp = builder.create<fir::IfOp>(loc, isNotAllocated,
+ /*withElseRegion=*/true);
+ builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+ // Just embox the null address and return.
+ // We have to give the embox a shape so that the LLVM box structure has the
+ // right rank. This returns an empty value if the types don't match.
+ mlir::Value shape = generateZeroShapeForRank(builder, loc, moldArg);
+
+ mlir::Value nullBox =
+ builder.create<fir::EmboxOp>(loc, valType, addr, shape,
+ /*slice=*/mlir::Value{}, lenParams);
+ builder.create<fir::StoreOp>(loc, nullBox, allocatedPrivVarArg);
+ return ifOp;
+}
+
+void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedScalar(
+ fir::BaseBoxType boxTy, bool needsInitialization) {
+ bool isAllocatableOrPointer =
+ mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy());
+ mlir::Type innerTy = fir::unwrapRefType(boxTy.getEleTy());
+ fir::IfOp ifUnallocated{nullptr};
+ if (isAllocatableOrPointer) {
+ ifUnallocated = handleNullAllocatable();
+ builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front());
+ }
+
+ mlir::Value valAlloc = builder.createHeapTemporary(loc, innerTy, /*name=*/{},
+ /*shape=*/{}, lenParams);
+ if (scalarInitValue)
+ builder.createStoreWithConvert(loc, scalarInitValue, valAlloc);
+ mlir::Value box = builder.create<fir::EmboxOp>(
+ loc, valType, valAlloc, /*shape=*/mlir::Value{},
+ /*slice=*/mlir::Value{}, lenParams);
+ initializeIfDerivedTypeBox(
+ builder, loc, box, getLoadedMoldArg(), needsInitialization,
+ /*isFirstPrivate=*/kind == DeclOperationKind::FirstPrivateOrLocalInit);
+ fir::StoreOp lastOp =
+ builder.create<fir::StoreOp>(loc, box, allocatedPrivVarArg);
+
+ createCleanupRegion(converter, loc, argType, cleanupRegion, sym,
+ isDoConcurrent);
+
+ if (ifUnallocated)
+ builder.setInsertionPointAfter(ifUnallocated);
+ else
+ builder.setInsertionPointAfter(lastOp);
+
+ createYield(allocatedPrivVarArg);
+}
+
+void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray(
+ fir::BaseBoxType boxTy, bool needsInitialization) {
+ bool isAllocatableOrPointer =
+ mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy());
+ getLengthParameters(builder, loc, getLoadedMoldArg(), lenParams);
+
+ fir::IfOp ifUnallocated{nullptr};
+ if (isAllocatableOrPointer) {
+ ifUnallocated = handleNullAllocatable();
+ builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front());
+ }
+
+ // Create the private copy from the initial fir.box:
+ hlfir::Entity source = hlfir::Entity{getLoadedMoldArg()};
+
+ // Special case for (possibly allocatable) arrays of polymorphic types
+ // e.g. !fir.class<!fir.heap<!fir.array<?x!fir.type<>>>>
+ if (source.isPolymorphic()) {
+ fir::ShapeShiftOp shape =
+ getShapeShift(builder, loc, source, cannotHaveNonDefaultLowerBounds);
+ mlir::Type arrayType = source.getElementOrSequenceType();
+ mlir::Value allocatedArray = builder.create<fir::AllocMemOp>(
+ loc, arrayType, /*typeparams=*/mlir::ValueRange{}, shape.getExtents());
+ mlir::Value firClass = builder.create<fir::EmboxOp>(loc, source.getType(),
+ allocatedArray, shape);
+ initializeIfDerivedTypeBox(
+ builder, loc, firClass, source, needsInitialization,
+ /*isFirstprivate=*/kind == DeclOperationKind::FirstPrivateOrLocalInit);
+ builder.create<fir::StoreOp>(loc, firClass, allocatedPrivVarArg);
+ if (ifUnallocated)
+ builder.setInsertionPointAfter(ifUnallocated);
+ createYield(allocatedPrivVarArg);
+ mlir::OpBuilder::InsertionGuard guard(builder);
+ createCleanupRegion(converter, loc, argType, cleanupRegion, sym,
+ isDoConcurrent);
+ return;
+ }
+
+ // Allocating on the heap in case the whole reduction/privatization is nested
+ // inside of a loop
+ auto [temp, needsDealloc] = createTempFromMold(loc, builder, source);
+ // if needsDealloc isn't statically false, add cleanup region. Always
+ // do this for allocatable boxes because they might have been re-allocated
+ // in the body of the loop/parallel region
+
+ std::optional<int64_t> cstNeedsDealloc = fir::getIntIfConstant(needsDealloc);
+ assert(cstNeedsDealloc.has_value() &&
+ "createTempFromMold decides this statically");
+ if (cstNeedsDealloc.has_value() && *cstNeedsDealloc != false) {
+ mlir::OpBuilder::InsertionGuard guard(builder);
+ createCleanupRegion(converter, loc, argType, cleanupRegion, sym,
+ isDoConcurrent);
+ } else {
+ assert(!isAllocatableOrPointer &&
+ "Pointer-like arrays must be heap allocated");
+ }
+
+ // Put the temporary inside of a box:
+ // hlfir::genVariableBox doesn't handle non-default lower bounds
+ mlir::Value box;
+ fir::ShapeShiftOp shapeShift = getShapeShift(builder, loc, getLoadedMoldArg(),
+ cannotHaveNonDefaultLowerBounds);
+ mlir::Type boxType = getLoadedMoldArg().getType();
+ if (mlir::isa<fir::BaseBoxType>(temp.getType()))
+ // the box created by the declare form createTempFromMold is missing
+ // lower bounds info
+ box = builder.create<fir::ReboxOp>(loc, boxType, temp, shapeShift,
+ /*shift=*/mlir::Value{});
+ else
+ box = builder.create<fir::EmboxOp>(
+ loc, boxType, temp, shapeShift,
+ /*slice=*/mlir::Value{},
+ /*typeParams=*/llvm::ArrayRef<mlir::Value>{});
+
+ if (scalarInitValue)
+ builder.create<hlfir::AssignOp>(loc, scalarInitValue, box);
+
+ initializeIfDerivedTypeBox(
+ builder, loc, box, getLoadedMoldArg(), needsInitialization,
+ /*isFirstPrivate=*/kind == DeclOperationKind::FirstPrivateOrLocalInit);
+
+ builder.create<fir::StoreOp>(loc, box, allocatedPrivVarArg);
+ if (ifUnallocated)
+ builder.setInsertionPointAfter(ifUnallocated);
+ createYield(allocatedPrivVarArg);
+}
+
+void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxchar(
+ fir::BoxCharType boxCharTy) {
+ mlir::Type eleTy = boxCharTy.getEleTy();
+ builder.setInsertionPointToStart(initBlock);
+ fir::factory::CharacterExprHelper charExprHelper{builder, loc};
+ auto [addr, len] = charExprHelper.createUnboxChar(moldArg);
+
+ // Using heap temporary so that
+ // 1) It is safe to use privatization inside of big loops.
+ // 2) The lifetime can outlive the current stack frame for delayed task
+ // execution.
+ // We can't always allocate a boxchar implicitly as the type of the
+ // omp.private because the allocation potentially needs the length
+ // parameters fetched above.
+ // TODO: this deviates from the intended design for delayed task
+ // execution.
+ mlir::Value privateAddr = builder.createHeapTemporary(
+ loc, eleTy, /*name=*/{}, /*shape=*/{}, /*lenParams=*/len);
+ mlir::Value boxChar = charExprHelper.createEmboxChar(privateAddr, len);
+
+ createCleanupRegion(converter, loc, argType, cleanupRegion, sym,
+ isDoConcurrent);
+
+ builder.setInsertionPointToEnd(initBlock);
+ createYield(boxChar);
+}
+
+void PopulateInitAndCleanupRegionsHelper::initAndCleanupUnboxedDerivedType(
+ bool needsInitialization) {
+ builder.setInsertionPointToStart(initBlock);
+ mlir::Type boxedTy = fir::BoxType::get(valType);
+ mlir::Value newBox =
+ builder.create<fir::EmboxOp>(loc, boxedTy, allocatedPrivVarArg);
+ mlir::Value moldBox = builder.create<fir::EmboxOp>(loc, boxedTy, moldArg);
+ initializeIfDerivedTypeBox(builder, loc, newBox, moldBox, needsInitialization,
+ /*isFirstPrivate=*/kind ==
+ DeclOperationKind::FirstPrivateOrLocalInit);
+
+ if (sym && hasFinalization(*sym))
+ createCleanupRegion(converter, loc, argType, cleanupRegion, sym,
+ isDoConcurrent);
+
+ builder.setInsertionPointToEnd(initBlock);
+ createYield(allocatedPrivVarArg);
+}
+
+/// This is the main driver deciding how to initialize the private variable.
+void PopulateInitAndCleanupRegionsHelper::populateByRefInitAndCleanupRegions() {
+ if (isPrivatization(kind)) {
+ assert(sym && "Symbol information is required to privatize derived types");
+ assert(!scalarInitValue && "ScalarInitvalue is unused for privatization");
+ }
+ mlir::Type valTy = fir::unwrapRefType(argType);
+
+ if (fir::isa_trivial(valTy)) {
+ initTrivialType();
+ return;
+ }
+
+ bool needsInitialization =
+ sym ? isDerivedTypeNeedingInitialization(sym->GetUltimate()) : false;
+
+ if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(valTy)) {
+ builder.setInsertionPointToEnd(initBlock);
+
+ // TODO: don't do this unless it is needed
+ getLengthParameters(builder, loc, getLoadedMoldArg(), lenParams);
+
+ if (isPrivatization(kind) &&
+ mlir::isa<fir::PointerType>(boxTy.getEleTy())) {
+ initBoxedPrivatePointer(boxTy);
+ return;
+ }
+
+ mlir::Type innerTy = fir::unwrapRefType(boxTy.getEleTy());
+ bool isDerived = fir::isa_derived(innerTy);
+ bool isChar = fir::isa_char(innerTy);
+ if (fir::isa_trivial(innerTy) || isDerived || isChar) {
+ // boxed non-sequence value e.g. !fir.box<!fir.heap<i32>>
+ if ((isDerived || isChar) && (isReduction(kind) || scalarInitValue))
+ TODO(loc, "Reduction of an unsupported boxed type");
+ initAndCleanupBoxedScalar(boxTy, needsInitialization);
+ return;
+ }
+
+ innerTy = fir::extractSequenceType(boxTy);
+ if (!innerTy || !mlir::isa<fir::SequenceType>(innerTy))
+ TODO(loc, "Unsupported boxed type for reduction/privatization");
+ initAndCleanupBoxedArray(boxTy, needsInitialization);
+ return;
+ }
+
+ // Unboxed types:
+ if (auto boxCharTy = mlir::dyn_cast<fir::BoxCharType>(argType)) {
+ initAndCleanupBoxchar(boxCharTy);
+ return;
+ }
+ if (fir::isa_derived(valType)) {
+ initAndCleanupUnboxedDerivedType(needsInitialization);
+ return;
+ }
+
+ TODO(loc,
+ "creating reduction/privatization init region for unsupported type");
+}
+
+void Fortran::lower::omp::populateByRefInitAndCleanupRegions(
+ Fortran::lower::AbstractConverter &converter, mlir::Location loc,
+ mlir::Type argType, mlir::Value scalarInitValue, mlir::Block *initBlock,
+ mlir::Value allocatedPrivVarArg, mlir::Value moldArg,
+ mlir::Region &cleanupRegion, DeclOperationKind kind,
+ const Fortran::semantics::Symbol *sym, bool cannotHaveLowerBounds,
+ bool isDoConcurrent) {
+ PopulateInitAndCleanupRegionsHelper helper(
+ converter, loc, argType, scalarInitValue, allocatedPrivVarArg, moldArg,
+ initBlock, cleanupRegion, kind, sym, cannotHaveLowerBounds,
+ isDoConcurrent);
+ helper.populateByRefInitAndCleanupRegions();
+
+ // Often we load moldArg to check something (e.g. length parameters, shape)
+ // but then those answers can be gotten statically without accessing the
+ // runtime value and so the only remaining use is a dead load. These loads can
+ // force us to insert additional barriers and so should be avoided where
+ // possible.
+ if (moldArg.hasOneUse()) {
+ mlir::Operation *user = *moldArg.getUsers().begin();
+ if (auto load = mlir::dyn_cast<fir::LoadOp>(user))
+ if (load.use_empty())
+ load.erase();
+ }
+}
diff --git a/flang/lib/Lower/Support/Utils.cpp b/flang/lib/Lower/Support/Utils.cpp
index 2de9db992e278..00a4da830b5c5 100644
--- a/flang/lib/Lower/Support/Utils.cpp
+++ b/flang/lib/Lower/Support/Utils.cpp
@@ -14,6 +14,7 @@
#include "flang/Common/indirection.h"
#include "flang/Lower/IterationSpace.h"
+#include "flang/Lower/Support/PrivateReductionUtils.h"
#include "flang/Semantics/tools.h"
#include <cstdint>
#include <optional>
@@ -645,17 +646,19 @@ void copyFirstPrivateSymbol(lower::AbstractConverter &converter,
template <typename OpType, typename OperandsStructType>
void privatizeSymbol(
lower::AbstractConverter &converter, fir::FirOpBuilder &firOpBuilder,
- lower::SymMap &symTable, std::function<void(OpType, mlir::Type)> initGen,
+ lower::SymMap &symTable,
llvm::SetVector<const semantics::Symbol *> &allPrivatizedSymbols,
+ llvm::SmallSet<const semantics::Symbol *, 16> &mightHaveReadHostSym,
const semantics::Symbol *symToPrivatize, OperandsStructType *clauseOps) {
- const semantics::Symbol *sym = symToPrivatize->HasLocalLocality()
- ? &symToPrivatize->GetUltimate()
- : symToPrivatize;
- lower::SymbolBox hsb = symToPrivatize->HasLocalLocality()
- ? converter.shallowLookupSymbol(*sym)
- : converter.lookupOneLevelUpSymbol(*sym);
+ constexpr bool isDoConcurrent =
+ std::is_same_v<OpType, fir::LocalitySpecifierOp>;
+
+ const semantics::Symbol *sym =
+ isDoConcurrent ? &symToPrivatize->GetUltimate() : symToPrivatize;
+ const lower::SymbolBox hsb = isDoConcurrent
+ ? converter.shallowLookupSymbol(*sym)
+ : converter.lookupOneLevelUpSymbol(*sym);
assert(hsb && "Host symbol box not found");
- hlfir::Entity entity{hsb.getAddr()};
mlir::Location symLoc = hsb.getAddr().getLoc();
std::string privatizerName = sym->name().ToString() + ".privatizer";
@@ -678,6 +681,7 @@ void privatizeSymbol(
// an alloca for a fir.array type there. Get around this by boxing all
// arrays.
if (mlir::isa<fir::SequenceType>(allocType)) {
+ hlfir::Entity entity{hsb.getAddr()};
entity = genVariableBox(symLoc, firOpBuilder, entity);
privVal = entity.getBase();
allocType = privVal.getType();
@@ -738,7 +742,35 @@ void privatizeSymbol(
mlir::isa<fir::BaseBoxType>(allocType) ||
mlir::isa<fir::BoxCharType>(allocType);
if (needsInitialization) {
- initGen(result, argType);
+ lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(
+ isDoConcurrent ? symToPrivatize->GetUltimate() : *symToPrivatize);
+
+ assert(hsb && "Host symbol box not found");
+ hlfir::Entity entity{hsb.getAddr()};
+ bool cannotHaveNonDefaultLowerBounds =
+ !entity.mayHaveNonDefaultLowerBounds();
+
+ mlir::Region &initRegion = result.getInitRegion();
+ mlir::Location symLoc = hsb.getAddr().getLoc();
+ mlir::Block *initBlock = firOpBuilder.createBlock(
+ &initRegion, /*insertPt=*/{}, {argType, argType}, {symLoc, symLoc});
+
+ bool emitCopyRegion =
+ symToPrivatize->test(semantics::Symbol::Flag::OmpFirstPrivate) ||
+ symToPrivatize->test(
+ Fortran::semantics::Symbol::Flag::LocalityLocalInit);
+
+ populateByRefInitAndCleanupRegions(
+ converter, symLoc, argType, /*scalarInitValue=*/nullptr, initBlock,
+ result.getInitPrivateArg(), result.getInitMoldArg(),
+ result.getDeallocRegion(),
+ emitCopyRegion ? omp::DeclOperationKind::FirstPrivateOrLocalInit
+ : omp::DeclOperationKind::PrivateOrLocal,
+ symToPrivatize, cannotHaveNonDefaultLowerBounds, isDoConcurrent);
+ // TODO: currently there are false positives from dead uses of the mold
+ // arg
+ if (result.initReadsFromMold())
+ mightHaveReadHostSym.insert(symToPrivatize);
}
// Populate the `copy` region if this is a `firstprivate`.
@@ -790,7 +822,7 @@ void privatizeSymbol(
clauseOps->privateVars.push_back(privVal);
}
- if (symToPrivatize->HasLocalLocality())
+ if (isDoConcurrent)
allPrivatizedSymbols.insert(symToPrivatize);
}
@@ -798,8 +830,8 @@ template void
privatizeSymbol<mlir::omp::PrivateClauseOp, mlir::omp::PrivateClauseOps>(
lower::AbstractConverter &converter, fir::FirOpBuilder &firOpBuilder,
lower::SymMap &symTable,
- std::function<void(mlir::omp::PrivateClauseOp, mlir::Type)> initGen,
llvm::SetVector<const semantics::Symbol *> &allPrivatizedSymbols,
+ llvm::SmallSet<const semantics::Symbol *, 16> &mightHaveReadHostSym,
const semantics::Symbol *symToPrivatize,
mlir::omp::PrivateClauseOps *clauseOps);
@@ -807,8 +839,8 @@ template void
privatizeSymbol<fir::LocalitySpecifierOp, fir::LocalitySpecifierOperands>(
lower::AbstractConverter &converter, fir::FirOpBuilder &firOpBuilder,
lower::SymMap &symTable,
- std::function<void(fir::LocalitySpecifierOp, mlir::Type)> initGen,
llvm::SetVector<const semantics::Symbol *> &allPrivatizedSymbols,
+ llvm::SmallSet<const semantics::Symbol *, 16> &mightHaveReadHostSym,
const semantics::Symbol *symToPrivatize,
fir::LocalitySpecifierOperands *clauseOps);
diff --git a/flang/test/Lower/do_concurrent_local_assoc_entity.f90 b/flang/test/Lower/do_concurrent_local_assoc_entity.f90
index 280827871aaf4..6c85ef0ec7595 100644
--- a/flang/test/Lower/do_concurrent_local_assoc_entity.f90
+++ b/flang/test/Lower/do_concurrent_local_assoc_entity.f90
@@ -1,4 +1,4 @@
-! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -mmlir --enable-delayed-privatization-staging=true -o - %s | FileCheck %s
subroutine local_assoc
implicit none
@@ -12,11 +12,22 @@ subroutine local_assoc
end associate
end subroutine local_assoc
-! CHECK: %[[C8:.*]] = arith.constant 8 : index
+! CHECK: fir.local {type = local} @[[LOCALIZER:.*local_assocEa.*]] : !fir.box<!fir.array<8xf32>> init {
+! CHECK-NEXT: ^{{.*}}(%{{.*}}: !{{.*}}, %[[LOCAL_ARG:.*]]: !{{.*}}):
+! CHECK-NEXT: %[[C8:.*]] = arith.constant 8 : index
+! CHECK-NEXT: %[[SHAPE:.*]] = fir.shape %[[C8]]
+! CHECK-NEXT: %[[TMP_ALLOC:.*]] = fir.allocmem !{{.*}} {bindc_name = ".tmp", {{.*}}}
+! CHECK: %[[TMP_DECL:.*]]:2 = hlfir.declare %[[TMP_ALLOC]](%[[SHAPE]])
+! CHECK-NEXT: %[[C1:.*]] = arith.constant 1 : index
+! CHECK-NEXT: %[[C8:.*]] = arith.constant 8 : index
+! CHECK-NEXT: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[C1]], %[[C8]]
+! CHECK-NEXT: %[[TMP_BOX:.*]] = fir.embox %[[TMP_DECL]]#0(%[[SHAPE_SHIFT]])
+! CHECK-NEXT: fir.store %[[TMP_BOX]] to %[[LOCAL_ARG]]
+! CHECK-NEXT: fir.yield(%[[LOCAL_ARG]] : !fir.ref<!fir.box<!fir.array<8xf32>>>)
+! CHECK-NEXT: }
-! CHECK: fir.do_concurrent.loop {{.*}} {
-! CHECK: %[[LOCAL_ALLOC:.*]] = fir.alloca !fir.array<8xf32> {bindc_name = "a", pinned, uniq_name = "{{.*}}local_assocEa"}
-! CHECK: %[[LOCAL_SHAPE:.*]] = fir.shape %[[C8]] :
-! CHECK: %[[LOCAL_DECL:.*]]:2 = hlfir.declare %[[LOCAL_ALLOC]](%[[LOCAL_SHAPE]])
-! CHECK: hlfir.designate %[[LOCAL_DECL]]#0 (%{{.*}})
+! CHECK: fir.do_concurrent.loop {{.*}} local(@[[LOCALIZER]] %{{.*}} -> %[[LOCAL_ARG:.*]] : {{.*}}) {
+! CHECK: %[[LOCAL_DECL:.*]]:2 = hlfir.declare %[[LOCAL_ARG]]
+! CHECK: %[[LOCAL_LD:.*]] = fir.load %[[LOCAL_DECL]]#0 : !fir.ref<!fir.box<!fir.array<8xf32>>>
+! CHECK: hlfir.designate %[[LOCAL_LD]] (%{{.*}})
! CHECK: }
diff --git a/flang/test/Lower/do_concurrent_local_default_init.f90 b/flang/test/Lower/do_concurrent_local_default_init.f90
index 207704ac1a990..d643213854744 100644
--- a/flang/test/Lower/do_concurrent_local_default_init.f90
+++ b/flang/test/Lower/do_concurrent_local_default_init.f90
@@ -1,5 +1,5 @@
! Test default initialization of DO CONCURRENT LOCAL() entities.
-! RUN: bbc -emit-hlfir -I nowhere -o - %s | FileCheck %s
+! RUN: bbc -emit-hlfir --enable-delayed-privatization-staging=true -I nowhere -o - %s | FileCheck %s
subroutine test_ptr(p)
interface
@@ -25,28 +25,39 @@ subroutine test_default_init()
end do
call something(res)
end subroutine
+
+! CHECK: fir.local {type = local} @[[DEFAULT_INIT_LOCALIZER:.*test_default_init.*]] : !fir.type<{{.*}}test_default_initTt{i:i32}> init {
+! CHECK-NEXT: ^{{.*}}(%{{.*}}: !{{.*}}, %[[LOCAL_ARG:.*]]: !fir.ref<!fir.type<_QFtest_default_initTt{i:i32}>>):
+! CHECK-NEXT: %[[LOCAL_ARG_BOX:.*]] = fir.embox %[[LOCAL_ARG]]
+! CHECK: %[[LOCAL_ARG_BOX_CVT:.*]] = fir.convert %[[LOCAL_ARG_BOX]]
+! CHECK: fir.call @_FortranAInitialize(%[[LOCAL_ARG_BOX_CVT]], {{.*}})
+! CHECK-NEXT: fir.yield(%[[LOCAL_ARG]] : {{.*}})
+! CHECK-NEXT: }
+
+! CHECK: fir.local {type = local} @[[PTR_LOCALIZER:.*test_ptrEp_private_box.*]] : !fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>> init {
+! CHECK-NEXT: ^{{.*}}(%[[ORIG_ARG:.*]]: !{{.*}}, %[[LOCAL_ARG:.*]]: !{{.*}}):
+! CHECK-NEXT: %[[ORIG_ARG_LD:.*]] = fir.load %[[ORIG_ARG]]
+! CHECK-NEXT: %[[ELEM_SIZE:.*]] = fir.box_elesize %[[ORIG_ARG_LD]]
+! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : index
+! CHECK-NEXT: %[[SHAPE:.*]] = fir.shape %[[C0]]
+! CHECK-NEXT: %[[ZERO_BITS:.*]] = fir.zero_bits
+! CHECK-NEXT: %[[LOCAL_BOX:.*]] = fir.embox %[[ZERO_BITS]](%[[SHAPE]]) typeparams %[[ELEM_SIZE]]
+! CHECK-NEXT: fir.store %[[LOCAL_BOX]] to %[[LOCAL_ARG]]
+! CHECK-NEXT: fir.yield(%[[LOCAL_ARG]] : {{.*}})
+! CHECK-NEXT: }
+
! CHECK-LABEL: func.func @_QPtest_ptr(
! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>> {fir.bindc_name = "p"}) {
! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>>
! CHECK: %[[VAL_7:.*]] = fir.box_elesize %[[VAL_6]] : (!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>) -> index
-! CHECK: fir.do_concurrent.loop
-! CHECK: %[[VAL_16:.*]] = fir.alloca !fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>> {bindc_name = "p", pinned, uniq_name = "_QFtest_ptrEp"}
-! CHECK: %[[VAL_17:.*]] = fir.zero_bits !fir.ptr<!fir.array<?x!fir.char<1,?>>>
-! CHECK: %[[VAL_18:.*]] = arith.constant 0 : index
-! CHECK: %[[VAL_19:.*]] = fir.shape %[[VAL_18]] : (index) -> !fir.shape<1>
-! CHECK: %[[VAL_20:.*]] = fir.embox %[[VAL_17]](%[[VAL_19]]) typeparams %[[VAL_7]] : (!fir.ptr<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index) -> !fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>
-! CHECK: fir.store %[[VAL_20]] to %[[VAL_16]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>>
-! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_16]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_ptrEp"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>>)
+! CHECK: fir.do_concurrent.loop {{.*}} local(@[[PTR_LOCALIZER]] %{{.*}}#0 -> %[[LOCAL_ARG:.*]] : {{.*}})
+! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[LOCAL_ARG]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_ptrEp"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>>)
! CHECK: fir.call @_QPtakes_ptr(%[[VAL_21]]#0) proc_attrs<pure> fastmath<contract> : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,?>>>>>) -> ()
! CHECK: }
! CHECK: return
! CHECK: }
! CHECK-LABEL: func.func @_QPtest_default_init(
-! CHECK: fir.do_concurrent.loop
-! CHECK: %[[VAL_26:.*]] = fir.alloca !fir.type<_QFtest_default_initTt{i:i32}> {bindc_name = "a", pinned, uniq_name = "_QFtest_default_initEa"}
-! CHECK: %[[VAL_27:.*]] = fir.embox %[[VAL_26]] : (!fir.ref<!fir.type<_QFtest_default_initTt{i:i32}>>) -> !fir.box<!fir.type<_QFtest_default_initTt{i:i32}>>
-! CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_27]] : (!fir.box<!fir.type<_QFtest_default_initTt{i:i32}>>) -> !fir.box<none>
-! CHECK: fir.call @_FortranAInitialize(%[[VAL_30]], {{.*}}
-! CHECK: %[[VAL_33:.*]]:2 = hlfir.declare %[[VAL_26]] {uniq_name = "_QFtest_default_initEa"} : (!fir.ref<!fir.type<_QFtest_default_initTt{i:i32}>>) -> (!fir.ref<!fir.type<_QFtest_default_initTt{i:i32}>>, !fir.ref<!fir.type<_QFtest_default_initTt{i:i32}>>)
+! CHECK: fir.do_concurrent.loop {{.*}} local(@[[DEFAULT_INIT_LOCALIZER]] %{{.*}}#0 -> %[[LOCAL_ARG:.*]] : {{.*}})
+! CHECK: %[[VAL_33:.*]]:2 = hlfir.declare %[[LOCAL_ARG]] {uniq_name = "_QFtest_default_initEa"} : (!fir.ref<!fir.type<_QFtest_default_initTt{i:i32}>>) -> (!fir.ref<!fir.type<_QFtest_default_initTt{i:i32}>>, !fir.ref<!fir.type<_QFtest_default_initTt{i:i32}>>)
! CHECK: }
More information about the flang-commits
mailing list