[flang-commits] [flang] [flang][OpenMP][RFC] Add support for COPYPRIVATE (PR #73128)
Leandro Lupori via flang-commits
flang-commits at lists.llvm.org
Wed Nov 22 06:45:21 PST 2023
https://github.com/luporl created https://github.com/llvm/llvm-project/pull/73128
Add initial handling of COPYPRIVATE clause.
It was implemented using a temporary stack variable that can be
accessed by all threads, a sync variable. A single thread writes the
value of its private variable to this temporary, at the end of the
single region. After the single operation, all threads then read
from the sync variable and write its value to their private copies.
Fixes https://github.com/llvm/llvm-project/issues/63933
>From 610ed00324f499072f3081b99bc55f88e0cc4744 Mon Sep 17 00:00:00 2001
From: Leandro Lupori <leandro.lupori at linaro.org>
Date: Wed, 22 Nov 2023 11:39:19 -0300
Subject: [PATCH] [flang][OpenMP][RFC] Add support for COPYPRIVATE
Add initial handling of COPYPRIVATE clause.
It was implemented using a temporary stack variable that can be
accessed by all threads, a sync variable. A single thread writes the
value of its private variable to this temporary, at the end of the
single region. After the single operation, all threads then read
from the sync variable and write its value to their private copies.
Fixes https://github.com/llvm/llvm-project/issues/63933
---
flang/include/flang/Lower/AbstractConverter.h | 23 +
flang/lib/Lower/Bridge.cpp | 430 +++++++++++-------
flang/lib/Lower/OpenMP.cpp | 75 ++-
flang/lib/Semantics/resolve-directives.cpp | 3 +-
flang/test/Lower/OpenMP/Todo/copyprivate.f90 | 13 -
flang/test/Lower/OpenMP/copyprivate.f90 | 244 ++++++++++
6 files changed, 596 insertions(+), 192 deletions(-)
delete mode 100644 flang/test/Lower/OpenMP/Todo/copyprivate.f90
create mode 100644 flang/test/Lower/OpenMP/copyprivate.f90
diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index 980fde881373249..bb182812c54132f 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -51,6 +51,7 @@ class DerivedTypeSpec;
} // namespace semantics
namespace lower {
+struct SymbolBox;
class SymMap;
namespace pft {
struct Variable;
@@ -111,13 +112,35 @@ class AbstractConverter {
virtual bool
createHostAssociateVarClone(const Fortran::semantics::Symbol &sym) = 0;
+ /// For a given symbol which may not be host-associated, create a clone using
+ /// parameters from the symbol or from the host-associated symbol, if any.
+ /// This member function does not insert the clone in the symbol table and
+ /// does not initialize it.
+ virtual Fortran::lower::SymbolBox
+ createVarClone(const Fortran::semantics::Symbol &sym) = 0;
+
+ /// Initialize a previously created clone.
+ virtual void initVarClone(const Fortran::semantics::Symbol &sym,
+ const Fortran::lower::SymbolBox &clone) = 0;
+
virtual void
createHostAssociateVarCloneDealloc(const Fortran::semantics::Symbol &sym) = 0;
+ virtual void createVarCloneDealloc(const Fortran::semantics::Symbol &sym,
+ Fortran::lower::SymbolBox &sb) = 0;
+
virtual void copyHostAssociateVar(
const Fortran::semantics::Symbol &sym,
mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr) = 0;
+ virtual void copyVar(const Fortran::semantics::Symbol &dst,
+ const Fortran::lower::SymbolBox &src,
+ bool needBarrier = false) = 0;
+
+ virtual void copyVar(const Fortran::lower::SymbolBox &dst,
+ const Fortran::semantics::Symbol &src,
+ bool needBarrier = false) = 0;
+
/// For a given symbol, check if it is present in the inner-most
/// level of the symbol map.
virtual bool isPresentShallowLookup(Fortran::semantics::Symbol &sym) = 0;
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 872bf6bc729ecd0..0cb43bb67a2a964 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -53,6 +53,7 @@
#include "flang/Semantics/symbol.h"
#include "flang/Semantics/tools.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Parser/Parser.h"
#include "mlir/Transforms/RegionUtils.h"
@@ -609,125 +610,41 @@ class FirConverter : public Fortran::lower::AbstractConverter {
bool createHostAssociateVarClone(
const Fortran::semantics::Symbol &sym) override final {
- mlir::Location loc = genLocation(sym.name());
- mlir::Type symType = genType(sym);
- const auto *details = sym.detailsIf<Fortran::semantics::HostAssocDetails>();
- assert(details && "No host-association found");
- const Fortran::semantics::Symbol &hsym = details->symbol();
- mlir::Type hSymType = genType(hsym);
- Fortran::lower::SymbolBox hsb = lookupSymbol(hsym);
-
- auto allocate = [&](llvm::ArrayRef<mlir::Value> shape,
- llvm::ArrayRef<mlir::Value> typeParams) -> mlir::Value {
- mlir::Value allocVal = builder->allocateLocal(
- loc,
- Fortran::semantics::IsAllocatableOrObjectPointer(&hsym.GetUltimate())
- ? hSymType
- : symType,
- mangleName(sym), toStringRef(sym.GetUltimate().name()),
- /*pinned=*/true, shape, typeParams,
- sym.GetUltimate().attrs().test(Fortran::semantics::Attr::TARGET));
- return allocVal;
- };
-
- fir::ExtendedValue hexv = symBoxToExtendedValue(hsb);
- fir::ExtendedValue exv = hexv.match(
- [&](const fir::BoxValue &box) -> fir::ExtendedValue {
- const Fortran::semantics::DeclTypeSpec *type = sym.GetType();
- if (type && type->IsPolymorphic())
- TODO(loc, "create polymorphic host associated copy");
- // Create a contiguous temp with the same shape and length as
- // the original variable described by a fir.box.
- llvm::SmallVector<mlir::Value> extents =
- fir::factory::getExtents(loc, *builder, hexv);
- if (box.isDerivedWithLenParameters())
- TODO(loc, "get length parameters from derived type BoxValue");
- if (box.isCharacter()) {
- mlir::Value len = fir::factory::readCharLen(*builder, loc, box);
- mlir::Value temp = allocate(extents, {len});
- return fir::CharArrayBoxValue{temp, len, extents};
- }
- return fir::ArrayBoxValue{allocate(extents, {}), extents};
- },
- [&](const fir::MutableBoxValue &box) -> fir::ExtendedValue {
- // Allocate storage for a pointer/allocatble descriptor.
- // No shape/lengths to be passed to the alloca.
- return fir::MutableBoxValue(allocate({}, {}), {}, {});
- },
- [&](const auto &) -> fir::ExtendedValue {
- mlir::Value temp =
- allocate(fir::factory::getExtents(loc, *builder, hexv),
- fir::factory::getTypeParams(loc, *builder, hexv));
- return fir::substBase(hexv, temp);
- });
-
- // Initialise cloned allocatable
- hexv.match(
- [&](const fir::MutableBoxValue &box) -> void {
- // Do not process pointers
- if (Fortran::semantics::IsPointer(sym.GetUltimate())) {
- return;
- }
- // Allocate storage for a pointer/allocatble descriptor.
- // No shape/lengths to be passed to the alloca.
- const auto new_box = exv.getBoxOf<fir::MutableBoxValue>();
+ assert(sym.detailsIf<Fortran::semantics::HostAssocDetails>() &&
+ "No host-association found");
+ fir::ExtendedValue exv = cloneSymbolValue(sym);
+ fir::ExtendedValue oexv = symBoxToExtendedValue(getOriginalSymbolBox(sym));
+ initClonedValue(sym, exv, oexv);
+ return bindIfNewSymbol(sym, exv);
+ }
- // allocate if allocated
- mlir::Value isAllocated =
- fir::factory::genIsAllocatedOrAssociatedTest(*builder, loc, box);
- auto if_builder = builder->genIfThenElse(loc, isAllocated);
- if_builder.genThen([&]() {
- std::string name = mangleName(sym) + ".alloc";
- if (auto seqTy = symType.dyn_cast<fir::SequenceType>()) {
- fir::ExtendedValue read = fir::factory::genMutableBoxRead(
- *builder, loc, box, /*mayBePolymorphic=*/false);
- if (auto read_arr_box = read.getBoxOf<fir::ArrayBoxValue>()) {
- fir::factory::genInlinedAllocation(
- *builder, loc, *new_box, read_arr_box->getLBounds(),
- read_arr_box->getExtents(),
- /*lenParams=*/std::nullopt, name,
- /*mustBeHeap=*/true);
- } else if (auto read_char_arr_box =
- read.getBoxOf<fir::CharArrayBoxValue>()) {
- fir::factory::genInlinedAllocation(
- *builder, loc, *new_box, read_char_arr_box->getLBounds(),
- read_char_arr_box->getExtents(),
- read_char_arr_box->getLen(), name,
- /*mustBeHeap=*/true);
- } else {
- TODO(loc, "Unhandled allocatable box type");
- }
- } else {
- fir::factory::genInlinedAllocation(
- *builder, loc, *new_box, box.getMutableProperties().lbounds,
- box.getMutableProperties().extents,
- box.nonDeferredLenParams(), name,
- /*mustBeHeap=*/true);
- }
- });
- if_builder.genElse([&]() {
- // nullify box
- auto empty = fir::factory::createUnallocatedBox(
- *builder, loc, new_box->getBoxTy(),
- new_box->nonDeferredLenParams(), {});
- builder->create<fir::StoreOp>(loc, empty, new_box->getAddr());
- });
- if_builder.end();
- },
- [&](const auto &) -> void {
- // Do nothing
- });
+ Fortran::lower::SymbolBox
+ createVarClone(const Fortran::semantics::Symbol &sym) override final {
+ fir::ExtendedValue exv = cloneSymbolValue(sym);
+ Fortran::lower::SymMap symMap;
+ addSymbol(sym, exv, /*forced=*/true, symMap);
+ return symMap.shallowLookupSymbol(sym);
+ }
- return bindIfNewSymbol(sym, exv);
+ void initVarClone(const Fortran::semantics::Symbol &sym,
+ const Fortran::lower::SymbolBox &clone) override final {
+ fir::ExtendedValue exv = symBoxToExtendedValue(clone);
+ fir::ExtendedValue oexv = symBoxToExtendedValue(getOriginalSymbolBox(sym));
+ initClonedValue(sym, exv, oexv);
}
void createHostAssociateVarCloneDealloc(
const Fortran::semantics::Symbol &sym) override final {
- mlir::Location loc = genLocation(sym.name());
Fortran::lower::SymbolBox hsb = lookupSymbol(sym);
+ createVarCloneDealloc(sym, hsb);
+ }
+
+ void createVarCloneDealloc(const Fortran::semantics::Symbol &sym,
+ Fortran::lower::SymbolBox &sb) override final {
+ mlir::Location loc = genLocation(sym.name());
- fir::ExtendedValue hexv = symBoxToExtendedValue(hsb);
- hexv.match(
+ fir::ExtendedValue exv = symBoxToExtendedValue(sb);
+ exv.match(
[&](const fir::MutableBoxValue &new_box) -> void {
// Do not process pointers
if (Fortran::semantics::IsPointer(sym.GetUltimate())) {
@@ -741,6 +658,20 @@ class FirConverter : public Fortran::lower::AbstractConverter {
});
}
+ void copyVar(const Fortran::semantics::Symbol &dst,
+ const Fortran::lower::SymbolBox &src,
+ bool needBarrier = false) override final {
+ Fortran::lower::SymbolBox dst_sb = lookupSymbol(dst);
+ copyVar(dst, dst_sb, src, needBarrier);
+ }
+
+ void copyVar(const Fortran::lower::SymbolBox &dst,
+ const Fortran::semantics::Symbol &src,
+ bool needBarrier = false) override final {
+ Fortran::lower::SymbolBox src_sb = lookupSymbol(src);
+ copyVar(src, dst, src_sb, needBarrier);
+ }
+
void copyHostAssociateVar(
const Fortran::semantics::Symbol &sym,
mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr) override final {
@@ -775,64 +706,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
rhs_sb = &hsb;
}
- mlir::Location loc = genLocation(sym.name());
-
- if (lowerToHighLevelFIR()) {
- hlfir::Entity lhs{lhs_sb->getAddr()};
- hlfir::Entity rhs{rhs_sb->getAddr()};
- // Temporary_lhs is set to true in hlfir.assign below to avoid user
- // assignment to be used and finalization to be called on the LHS.
- // This may or may not be correct but mimics the current behaviour
- // without HLFIR.
- auto copyData = [&](hlfir::Entity l, hlfir::Entity r) {
- // Dereference RHS and load it if trivial scalar.
- r = hlfir::loadTrivialScalar(loc, *builder, r);
- builder->create<hlfir::AssignOp>(
- loc, r, l,
- /*isWholeAllocatableAssignment=*/false,
- /*keepLhsLengthInAllocatableAssignment=*/false,
- /*temporary_lhs=*/true);
- };
- if (lhs.isAllocatable()) {
- // Deep copy allocatable if it is allocated.
- // Note that when allocated, the RHS is already allocated with the LHS
- // shape for copy on entry in createHostAssociateVarClone.
- // For lastprivate, this assumes that the RHS was not reallocated in
- // the OpenMP region.
- lhs = hlfir::derefPointersAndAllocatables(loc, *builder, lhs);
- mlir::Value addr = hlfir::genVariableRawAddress(loc, *builder, lhs);
- mlir::Value isAllocated = builder->genIsNotNullAddr(loc, addr);
- builder->genIfThen(loc, isAllocated)
- .genThen([&]() {
- // Copy the DATA, not the descriptors.
- copyData(lhs, rhs);
- })
- .end();
- } else if (lhs.isPointer()) {
- // Set LHS target to the target of RHS (do not copy the RHS
- // target data into the LHS target storage).
- auto loadVal = builder->create<fir::LoadOp>(loc, rhs);
- builder->create<fir::StoreOp>(loc, loadVal, lhs);
- } else {
- // Non ALLOCATABLE/POINTER variable. Simple DATA copy.
- copyData(lhs, rhs);
- }
- } else {
- fir::ExtendedValue lhs = symBoxToExtendedValue(*lhs_sb);
- fir::ExtendedValue rhs = symBoxToExtendedValue(*rhs_sb);
- mlir::Type symType = genType(sym);
- if (auto seqTy = symType.dyn_cast<fir::SequenceType>()) {
- Fortran::lower::StatementContext stmtCtx;
- Fortran::lower::createSomeArrayAssignment(*this, lhs, rhs, localSymbols,
- stmtCtx);
- stmtCtx.finalizeAndReset();
- } else if (lhs.getBoxOf<fir::CharBoxValue>()) {
- fir::factory::CharacterExprHelper{*builder, loc}.createAssign(lhs, rhs);
- } else {
- auto loadVal = builder->create<fir::LoadOp>(loc, fir::getBase(rhs));
- builder->create<fir::StoreOp>(loc, loadVal, fir::getBase(lhs));
- }
- }
+ copyVar(sym, *lhs_sb, *rhs_sb);
if (copyAssignIP && copyAssignIP->isSet() &&
sym.test(Fortran::semantics::Symbol::Flag::OmpLastPrivate)) {
@@ -1075,16 +949,226 @@ class FirConverter : public Fortran::lower::AbstractConverter {
fir::ExtendedValue val, bool forced = false) {
if (!forced && lookupSymbol(sym))
return false;
+ return addSymbol(sym, val, forced, localSymbols);
+ }
+
+ /// Add the symbol to \p symMap.
+ /// Always returns `true`.
+ bool addSymbol(const Fortran::semantics::SymbolRef sym,
+ fir::ExtendedValue val, bool forced,
+ Fortran::lower::SymMap &symMap) {
if (lowerToHighLevelFIR()) {
- Fortran::lower::genDeclareSymbol(*this, localSymbols, sym, val,
- fir::FortranVariableFlagsEnum::None,
- forced);
+ Fortran::lower::genDeclareSymbol(
+ *this, symMap, sym, val, fir::FortranVariableFlagsEnum::None, forced);
} else {
- localSymbols.addSymbol(sym, val, forced);
+ symMap.addSymbol(sym, val, forced);
}
return true;
}
+ void initClonedValue(const Fortran::semantics::Symbol &sym,
+ const fir::ExtendedValue &clone,
+ const fir::ExtendedValue &orig) {
+ mlir::Location loc = genLocation(sym.name());
+ mlir::Type symType = genType(sym);
+ // The type of a non host associated symbol may be wrapped inside a box.
+ if (!sym.detailsIf<Fortran::semantics::HostAssocDetails>()) {
+ if (mlir::Type seqType = fir::unwrapUntilSeqType(symType))
+ symType = seqType;
+ }
+
+ // Initialise cloned allocatable
+ orig.match(
+ [&](const fir::MutableBoxValue &box) -> void {
+ // Do not process pointers
+ if (Fortran::semantics::IsPointer(sym.GetUltimate())) {
+ return;
+ }
+ // Allocate storage for a pointer/allocatble descriptor.
+ // No shape/lengths to be passed to the alloca.
+ const auto new_box = clone.getBoxOf<fir::MutableBoxValue>();
+
+ // allocate if allocated
+ mlir::Value isAllocated =
+ fir::factory::genIsAllocatedOrAssociatedTest(*builder, loc, box);
+ auto if_builder = builder->genIfThenElse(loc, isAllocated);
+ if_builder.genThen([&]() {
+ std::string name = mangleName(sym) + ".alloc";
+ if (auto seqTy = symType.dyn_cast<fir::SequenceType>()) {
+ fir::ExtendedValue read = fir::factory::genMutableBoxRead(
+ *builder, loc, box, /*mayBePolymorphic=*/false);
+ if (auto read_arr_box = read.getBoxOf<fir::ArrayBoxValue>()) {
+ fir::factory::genInlinedAllocation(
+ *builder, loc, *new_box, read_arr_box->getLBounds(),
+ read_arr_box->getExtents(),
+ /*lenParams=*/std::nullopt, name,
+ /*mustBeHeap=*/true);
+ } else if (auto read_char_arr_box =
+ read.getBoxOf<fir::CharArrayBoxValue>()) {
+ fir::factory::genInlinedAllocation(
+ *builder, loc, *new_box, read_char_arr_box->getLBounds(),
+ read_char_arr_box->getExtents(),
+ read_char_arr_box->getLen(), name,
+ /*mustBeHeap=*/true);
+ } else {
+ TODO(loc, "Unhandled allocatable box type");
+ }
+ } else {
+ fir::factory::genInlinedAllocation(
+ *builder, loc, *new_box, box.getMutableProperties().lbounds,
+ box.getMutableProperties().extents,
+ box.nonDeferredLenParams(), name,
+ /*mustBeHeap=*/true);
+ }
+ });
+ if_builder.genElse([&]() {
+ // nullify box
+ auto empty = fir::factory::createUnallocatedBox(
+ *builder, loc, new_box->getBoxTy(),
+ new_box->nonDeferredLenParams(), {});
+ builder->create<fir::StoreOp>(loc, empty, new_box->getAddr());
+ });
+ if_builder.end();
+ },
+ [&](const auto &) -> void {
+ // Do nothing
+ });
+ }
+
+ Fortran::lower::SymbolBox
+ getOriginalSymbolBox(const Fortran::semantics::Symbol &sym) {
+ const auto *details = sym.detailsIf<Fortran::semantics::HostAssocDetails>();
+ if (details) {
+ const Fortran::semantics::Symbol &hsym = details->symbol();
+ return lookupSymbol(hsym);
+ }
+ return lookupSymbol(sym);
+ }
+
+ fir::ExtendedValue cloneSymbolValue(const Fortran::semantics::Symbol &sym) {
+ mlir::Location loc = genLocation(sym.name());
+ mlir::Type symType = genType(sym);
+ mlir::Type allocType = symType;
+ const auto *details = sym.detailsIf<Fortran::semantics::HostAssocDetails>();
+ if (details) {
+ const Fortran::semantics::Symbol &hsym = details->symbol();
+ if (Fortran::semantics::IsAllocatableOrObjectPointer(&hsym.GetUltimate()))
+ allocType = genType(hsym);
+ }
+ Fortran::lower::SymbolBox sb = getOriginalSymbolBox(sym);
+
+ auto allocate = [&](llvm::ArrayRef<mlir::Value> shape,
+ llvm::ArrayRef<mlir::Value> typeParams) -> mlir::Value {
+ mlir::Value allocVal = builder->allocateLocal(
+ loc, allocType, mangleName(sym),
+ toStringRef(sym.GetUltimate().name()),
+ /*pinned=*/true, shape, typeParams,
+ sym.GetUltimate().attrs().test(Fortran::semantics::Attr::TARGET));
+ return allocVal;
+ };
+
+ fir::ExtendedValue oexv = symBoxToExtendedValue(sb);
+ fir::ExtendedValue exv = oexv.match(
+ [&](const fir::BoxValue &box) -> fir::ExtendedValue {
+ const Fortran::semantics::DeclTypeSpec *type = sym.GetType();
+ if (type && type->IsPolymorphic())
+ TODO(loc, "create polymorphic copy");
+ // Create a contiguous temp with the same shape and length as
+ // the original variable described by a fir.box.
+ llvm::SmallVector<mlir::Value> extents =
+ fir::factory::getExtents(loc, *builder, oexv);
+ if (box.isDerivedWithLenParameters())
+ TODO(loc, "get length parameters from derived type BoxValue");
+ if (box.isCharacter()) {
+ mlir::Value len = fir::factory::readCharLen(*builder, loc, box);
+ mlir::Value temp = allocate(extents, {len});
+ return fir::CharArrayBoxValue{temp, len, extents};
+ }
+ return fir::ArrayBoxValue{allocate(extents, {}), extents};
+ },
+ [&](const fir::MutableBoxValue &box) -> fir::ExtendedValue {
+ // Allocate storage for a pointer/allocatble descriptor.
+ // No shape/lengths to be passed to the alloca.
+ return fir::MutableBoxValue(allocate({}, {}), {}, {});
+ },
+ [&](const auto &) -> fir::ExtendedValue {
+ mlir::Value temp =
+ allocate(fir::factory::getExtents(loc, *builder, oexv),
+ fir::factory::getTypeParams(loc, *builder, oexv));
+ return fir::substBase(oexv, temp);
+ });
+
+ return exv;
+ }
+
+ void copyVar(const Fortran::semantics::Symbol &sym,
+ const Fortran::lower::SymbolBox &lhs_sb,
+ const Fortran::lower::SymbolBox &rhs_sb,
+ bool needBarrier = false) {
+ mlir::Location loc = genLocation(sym.name());
+
+ if (lowerToHighLevelFIR()) {
+ hlfir::Entity lhs{lhs_sb.getAddr()};
+ hlfir::Entity rhs{rhs_sb.getAddr()};
+ // Temporary_lhs is set to true in hlfir.assign below to avoid user
+ // assignment to be used and finalization to be called on the LHS.
+ // This may or may not be correct but mimics the current behaviour
+ // without HLFIR.
+ auto copyData = [&](hlfir::Entity l, hlfir::Entity r) {
+ // Dereference RHS and load it if trivial scalar.
+ hlfir::Entity r2 = hlfir::loadTrivialScalar(loc, *builder, r);
+ if (needBarrier && r2 != r)
+ builder->create<mlir::omp::BarrierOp>(loc);
+ builder->create<hlfir::AssignOp>(
+ loc, r2, l,
+ /*isWholeAllocatableAssignment=*/false,
+ /*keepLhsLengthInAllocatableAssignment=*/false,
+ /*temporary_lhs=*/true);
+ if (needBarrier)
+ builder->create<mlir::omp::BarrierOp>(loc);
+ };
+ if (lhs.isAllocatable()) {
+ // Deep copy allocatable if it is allocated.
+ // Note that when allocated, the RHS is already allocated with the LHS
+ // shape for copy on entry in createHostAssociateVarClone.
+ // For lastprivate, this assumes that the RHS was not reallocated in
+ // the OpenMP region.
+ lhs = hlfir::derefPointersAndAllocatables(loc, *builder, lhs);
+ mlir::Value addr = hlfir::genVariableRawAddress(loc, *builder, lhs);
+ mlir::Value isAllocated = builder->genIsNotNullAddr(loc, addr);
+ builder->genIfThen(loc, isAllocated)
+ .genThen([&]() {
+ // Copy the DATA, not the descriptors.
+ copyData(lhs, rhs);
+ })
+ .end();
+ } else if (lhs.isPointer()) {
+ // Set LHS target to the target of RHS (do not copy the RHS
+ // target data into the LHS target storage).
+ auto loadVal = builder->create<fir::LoadOp>(loc, rhs);
+ builder->create<fir::StoreOp>(loc, loadVal, lhs);
+ } else {
+ // Non ALLOCATABLE/POINTER variable. Simple DATA copy.
+ copyData(lhs, rhs);
+ }
+ } else {
+ fir::ExtendedValue lhs = symBoxToExtendedValue(lhs_sb);
+ fir::ExtendedValue rhs = symBoxToExtendedValue(rhs_sb);
+ mlir::Type symType = genType(sym);
+ if (auto seqTy = symType.dyn_cast<fir::SequenceType>()) {
+ Fortran::lower::StatementContext stmtCtx;
+ Fortran::lower::createSomeArrayAssignment(*this, lhs, rhs, localSymbols,
+ stmtCtx);
+ stmtCtx.finalizeAndReset();
+ } else if (lhs.getBoxOf<fir::CharBoxValue>()) {
+ fir::factory::CharacterExprHelper{*builder, loc}.createAssign(lhs, rhs);
+ } else {
+ auto loadVal = builder->create<fir::LoadOp>(loc, fir::getBase(rhs));
+ builder->create<fir::StoreOp>(loc, loadVal, fir::getBase(lhs));
+ }
+ }
+ }
+
/// Map a block argument to a result or dummy symbol. This is not the
/// definitive mapping. The specification expression have not been lowered
/// yet. The final mapping will be done using this pre-mapping in
diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 517812305358c77..aea0b84898ace1e 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -18,6 +18,7 @@
#include "flang/Lower/ConvertVariable.h"
#include "flang/Lower/PFTBuilder.h"
#include "flang/Lower/StatementContext.h"
+#include "flang/Lower/SymbolMap.h"
#include "flang/Optimizer/Builder/BoxValue.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/Todo.h"
@@ -561,6 +562,7 @@ class ClauseProcessor {
processAllocate(llvm::SmallVectorImpl<mlir::Value> &allocatorOperands,
llvm::SmallVectorImpl<mlir::Value> &allocateOperands) const;
bool processCopyin() const;
+ bool processCopyPrivate(mlir::Operation *singleOp) const;
bool processDepend(llvm::SmallVectorImpl<mlir::Attribute> &dependTypeOperands,
llvm::SmallVectorImpl<mlir::Value> &dependOperands) const;
bool
@@ -1631,6 +1633,67 @@ bool ClauseProcessor::processCopyin() const {
return hasCopyin;
}
+bool ClauseProcessor::processCopyPrivate(mlir::Operation *singleOp) const {
+ fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+ assert(singleOp->getNumRegions() == 1 && !singleOp->getRegion(0).empty());
+
+ auto copyPrivateVar = [&](Fortran::semantics::Symbol *sym) {
+ // If we are inside a parallel operation, then create a clone before it,
+ // else insert the clone right before the single operation.
+ mlir::Operation *insOp = singleOp->getParentOp();
+ while (insOp) {
+ if (mlir::dyn_cast<mlir::omp::ParallelOp>(insOp))
+ break;
+ insOp = insOp->getParentOp();
+ }
+ if (!insOp)
+ insOp = singleOp;
+ firOpBuilder.setInsertionPoint(insOp);
+ Fortran::lower::SymbolBox clone = converter.createVarClone(*sym);
+ // Initialize clone, at the end of the single region.
+ mlir::Operation *lastOp = singleOp->getRegion(0).back().getTerminator();
+ firOpBuilder.setInsertionPoint(lastOp);
+ converter.initVarClone(*sym, clone);
+ converter.copyVar(clone, *sym);
+ // Reload the private variable from the clone, after the single operation.
+ // For ALLOCATABLE variables, the copy will happen inside an if-allocated
+ // block and this is where the barrier will be inserted. This can cause
+ // a hang if some threads have allocated the variable and others not.
+ // Luckly, OpenMP spec [OMP 5.2, 5.7.2] states that any list item with the
+ // ALLOCATABLE attribute must have the allocation status of allocated when
+ // the intrinsic assignment is performed.
+ firOpBuilder.setInsertionPointAfter(singleOp);
+ converter.copyVar(*sym, clone, /*needBarrier=*/true);
+ converter.createVarCloneDealloc(*sym, clone);
+ };
+
+ bool hasCopyPrivate = findRepeatableClause<ClauseTy::Copyprivate>(
+ [&](const ClauseTy::Copyprivate *copyPrivateClause,
+ const Fortran::parser::CharBlock &) {
+ const Fortran::parser::OmpObjectList &ompObjectList =
+ copyPrivateClause->v;
+ for (const Fortran::parser::OmpObject &ompObject : ompObjectList.v) {
+ Fortran::semantics::Symbol *sym = getOmpObjectSymbol(ompObject);
+ if (const auto *commonDetails =
+ sym->detailsIf<Fortran::semantics::CommonBlockDetails>()) {
+ for (const auto &mem : commonDetails->objects())
+ copyPrivateVar(&*mem);
+ break;
+ }
+ copyPrivateVar(sym);
+ }
+ });
+
+ if (hasCopyPrivate) {
+ // Set insertion point to start of region.
+ // At least a terminator should be present.
+ auto &ops = singleOp->getRegion(0).front().getOperations();
+ assert(!ops.empty() && "Unexpected empty region");
+ firOpBuilder.setInsertionPoint(&ops.front());
+ }
+ return hasCopyPrivate;
+}
+
bool ClauseProcessor::processDepend(
llvm::SmallVectorImpl<mlir::Attribute> &dependTypeOperands,
llvm::SmallVectorImpl<mlir::Value> &dependOperands) const {
@@ -2339,14 +2402,15 @@ genSingleOp(Fortran::lower::AbstractConverter &converter,
ClauseProcessor cp(converter, beginClauseList);
cp.processAllocate(allocatorOperands, allocateOperands);
- cp.processTODO<Fortran::parser::OmpClause::Copyprivate>(
- currentLocation, llvm::omp::Directive::OMPD_single);
- ClauseProcessor(converter, endClauseList).processNowait(nowaitAttr);
+ ClauseProcessor ecp(converter, endClauseList);
+ ecp.processNowait(nowaitAttr);
- return genOpWithBody<mlir::omp::SingleOp>(
+ auto singleOp = genOpWithBody<mlir::omp::SingleOp>(
converter, eval, currentLocation, /*outerCombined=*/false,
&beginClauseList, allocateOperands, allocatorOperands, nowaitAttr);
+ ecp.processCopyPrivate(singleOp);
+ return singleOp;
}
static mlir::omp::TaskOp
@@ -3123,7 +3187,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
for (const auto &clause : endClauseList.v) {
mlir::Location clauseLocation = converter.genLocation(clause.source);
- if (!std::get_if<Fortran::parser::OmpClause::Nowait>(&clause.u))
+ if (!std::get_if<Fortran::parser::OmpClause::Nowait>(&clause.u) &&
+ !std::get_if<Fortran::parser::OmpClause::Copyprivate>(&clause.u))
TODO(clauseLocation, "OpenMP Block construct clause");
}
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index bbb105e3516da18..314af74fd4bc84f 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -2368,7 +2368,8 @@ void OmpAttributeVisitor::CheckDataCopyingClause(
// either 'private' or 'threadprivate' in enclosing context.
if (!checkSymbol->test(Symbol::Flag::OmpThreadprivate) &&
!(HasSymbolInEnclosingScope(symbol, currScope()) &&
- symbol.test(Symbol::Flag::OmpPrivate))) {
+ (symbol.test(Symbol::Flag::OmpPrivate) ||
+ symbol.test(Symbol::Flag::OmpFirstPrivate)))) {
context_.Say(name.source,
"COPYPRIVATE variable '%s' is not PRIVATE or THREADPRIVATE in "
"outer context"_err_en_US,
diff --git a/flang/test/Lower/OpenMP/Todo/copyprivate.f90 b/flang/test/Lower/OpenMP/Todo/copyprivate.f90
deleted file mode 100644
index 0d871427ce60ff4..000000000000000
--- a/flang/test/Lower/OpenMP/Todo/copyprivate.f90
+++ /dev/null
@@ -1,13 +0,0 @@
-! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-
-! CHECK: not yet implemented: OpenMP Block construct clause
-subroutine sb
- integer, save :: a
- !$omp threadprivate(a)
- !$omp parallel
- !$omp single
- a = 3
- !$omp end single copyprivate(a)
- !$omp end parallel
-end subroutine
diff --git a/flang/test/Lower/OpenMP/copyprivate.f90 b/flang/test/Lower/OpenMP/copyprivate.f90
new file mode 100644
index 000000000000000..bac71a794246455
--- /dev/null
+++ b/flang/test/Lower/OpenMP/copyprivate.f90
@@ -0,0 +1,244 @@
+! Test COPYPRIVATE.
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+!CHECK-LABEL: func @_QPtest_tp
+!CHECK: %[[SYNC_VAR_ADDR:.*]] = fir.alloca f32 {bindc_name = "a", pinned, uniq_name = "_QFtest_tpEa"}
+!CHECK: %[[SYNC_VAR:.*]]:2 = hlfir.declare %[[SYNC_VAR_ADDR]] {uniq_name = "_QFtest_tpEa"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK: omp.single {
+!CHECK: hlfir.assign %{{.*}} to %[[SYNC_VAR]]#0 temporary_lhs : f32, !fir.ref<f32>
+!CHECK-NEXT: omp.terminator
+!CHECK-NEXT: }
+!CHECK-NEXT: %[[TMP:.*]] = fir.load %[[SYNC_VAR]]#0 : !fir.ref<f32>
+!CHECK-NEXT: omp.barrier
+!CHECK-NEXT: hlfir.assign %[[TMP]] to %{{.*}}#1 temporary_lhs : f32, !fir.ref<f32>
+!CHECK-NEXT: omp.barrier
+subroutine test_tp()
+ real(4), save :: a = 2.5
+ !$omp threadprivate(a)
+
+ !$omp single
+ a = 1.5
+ !$omp end single copyprivate(a)
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_priv
+!CHECK: %[[ORIG_VAR:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_privEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[SYNC_VAR:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_privEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.parallel {
+!CHECK: omp.single {
+!CHECK: hlfir.assign %{{.*}} to %[[SYNC_VAR]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT: omp.terminator
+!CHECK-NEXT: }
+!CHECK-NEXT: %[[TMP:.*]] = fir.load %[[SYNC_VAR]]#0 : !fir.ref<i32>
+!CHECK-NEXT: omp.barrier
+!CHECK-NEXT: hlfir.assign %[[TMP]] to %{{.*}}#1 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT: omp.barrier
+!CHECK: }
+subroutine test_priv()
+ integer :: i
+
+ i = 11
+ !$omp parallel firstprivate(i)
+ !$omp single
+ i = i + 1
+ !$omp end single copyprivate(i)
+ !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_array
+!CHECK: %[[ORIG_VAR:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+!CHECK: %[[SYNC_VAR:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+!CHECK: omp.parallel {
+!CHECK: omp.single {
+!CHECK: hlfir.assign %{{.*}}#1 to %[[SYNC_VAR]]#0 temporary_lhs : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
+!CHECK-NEXT: omp.terminator
+!CHECK-NEXT: }
+!CHECK-NEXT: hlfir.assign %[[SYNC_VAR]]#0 to %{{.*}}#1 temporary_lhs : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
+!CHECK-NEXT: omp.barrier
+!CHECK: }
+subroutine test_array()
+ integer :: a(10), i
+
+ a = -1
+ !$omp parallel firstprivate(a)
+ !$omp single
+ do i = 1, 5
+ a(i) = i * 10
+ end do
+ !$omp end single copyprivate(a)
+ !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_type
+!CHECK: %[[ORIG_VAR:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_typeEt"} : (!fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>) -> (!fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>, !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>)
+!CHECK: %[[SYNC_VAR:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_typeEt"} : (!fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>) -> (!fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>, !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>)
+!CHECK: omp.parallel {
+!CHECK: omp.single {
+!CHECK: hlfir.assign %{{.*}}#1 to %[[SYNC_VAR]]#0 temporary_lhs : !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>, !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>
+!CHECK-NEXT: omp.terminator
+!CHECK-NEXT: }
+!CHECK-NEXT: hlfir.assign %[[SYNC_VAR]]#0 to %{{.*}}#1 temporary_lhs : !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>, !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>
+!CHECK-NEXT: omp.barrier
+!CHECK: }
+subroutine test_type()
+ type ty
+ integer :: i
+ real :: r
+ integer, dimension(10) :: a
+ end type
+
+ integer :: i
+ type(ty) :: t
+
+ t%i = -1
+ t%r = -1.5
+ t%a = -1
+ !$omp parallel firstprivate(t)
+ !$omp single
+ t%i = 42
+ t%r = 3.14
+ do i = 1, 5
+ t%a(i) = i * 10
+ end do
+ !$omp end single copyprivate(t)
+ !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_multi
+!CHECK: %[[I_SYNC_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFtest_multiEi"}
+!CHECK: %[[I_SYNC:.*]]:2 = hlfir.declare %[[I_SYNC_ADDR]] {uniq_name = "_QFtest_multiEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[J_SYNC_ADDR:.*]] = fir.alloca i32 {bindc_name = "j", pinned, uniq_name = "_QFtest_multiEj"}
+!CHECK: %[[J_SYNC:.*]]:2 = hlfir.declare %[[J_SYNC_ADDR]] {uniq_name = "_QFtest_multiEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[K_SYNC_ADDR:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFtest_multiEk"}
+!CHECK: %[[K_SYNC:.*]]:2 = hlfir.declare %[[K_SYNC_ADDR]] {uniq_name = "_QFtest_multiEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: omp.parallel {
+!CHECK: omp.single {
+!CHECK: %[[I:.*]] = fir.load %{{.*}}#1 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[I]] to %[[I_SYNC]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[J:.*]] = fir.load %{{.*}}#1 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[J]] to %[[J_SYNC]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK: %[[K:.*]] = fir.load %{{.*}}#1 : !fir.ref<i32>
+!CHECK: hlfir.assign %[[K]] to %[[K_SYNC]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT: omp.terminator
+!CHECK-NEXT: }
+!CHECK-NEXT: %[[K:.*]] = fir.load %[[K_SYNC]]#0 : !fir.ref<i32>
+!CHECK-NEXT: omp.barrier
+!CHECK-NEXT: hlfir.assign %[[K]] to %{{.*}}#1 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT: omp.barrier
+!CHECK-NEXT: %[[J:.*]] = fir.load %[[J_SYNC]]#0 : !fir.ref<i32>
+!CHECK-NEXT: omp.barrier
+!CHECK-NEXT: hlfir.assign %[[J]] to %{{.*}}#1 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT: omp.barrier
+!CHECK-NEXT: %[[I:.*]] = fir.load %[[I_SYNC]]#0 : !fir.ref<i32>
+!CHECK-NEXT: omp.barrier
+!CHECK-NEXT: hlfir.assign %[[I]] to %{{.*}}#1 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT: omp.barrier
+!CHECK: }
+subroutine test_multi()
+ integer, save :: i, j, k
+ !$omp threadprivate(i, j, k)
+
+ i = 11
+ j = 12
+ k = 13
+ !$omp parallel
+ !$omp single
+ i = 21
+ j = 22
+ k = 23
+ !$omp end single copyprivate(i, j, k)
+ !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_alloc
+!CHECK: %[[SYNC_ADDR:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>> {bindc_name = "a", pinned, uniq_name = "_QFtest_allocEa"}
+!CHECK: %[[SYNC_VAR:.*]]:2 = hlfir.declare %[[SYNC_ADDR]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_allocEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+!CHECK-NEXT: omp.parallel {
+!CHECK: omp.single {
+!CHECK: fir.if %{{.*}} {
+!CHECK: %[[TMP0:.*]] = fir.allocmem !fir.array<?xi32>, %{{.*}} {fir.must_be_heap = true, uniq_name = "_QFtest_allocEa.alloc"}
+!CHECK: %[[TMP1:.*]] = fir.embox %[[TMP0]](%{{.*}}) : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+!CHECK: fir.store %[[TMP1]] to %[[SYNC_VAR]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK: } else {
+!CHECK: %[[TMP2:.*]] = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+!CHECK: %[[TMP3:.*]] = fir.embox %[[TMP2]](%{{.*}}) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+!CHECK: fir.store %[[TMP3]] to %[[SYNC_VAR]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK: }
+!CHECK: %[[TMP4:.*]] = fir.load %[[SYNC_VAR]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK: fir.if %{{.*}} {
+!CHECK: hlfir.assign %{{.*}} to %[[TMP4]] temporary_lhs : !fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>
+!CHECK: }
+!CHECK-NEXT: omp.terminator
+!CHECK-NEXT: }
+!CHECK: fir.if %{{.*}} {
+!CHECK: %[[TMP5:.*]] = fir.load %[[SYNC_VAR]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK-NEXT: omp.barrier
+!CHECK-NEXT: hlfir.assign %[[TMP5]] to %{{.*}} temporary_lhs : !fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>
+!CHECK-NEXT: omp.barrier
+!CHECK: }
+!CHECK: fir.if %{{.*}} {
+!CHECK: %[[TMP6:.*]] = fir.load %[[SYNC_VAR]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK: %[[TMP7:.*]] = fir.box_addr %[[TMP6]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+!CHECK: fir.freemem %[[TMP7]] : !fir.heap<!fir.array<?xi32>>
+!CHECK: }
+subroutine test_alloc()
+ integer, allocatable :: a(:)
+ integer :: i
+
+ allocate(a(10))
+ a = -1
+ !$omp parallel firstprivate(a)
+ !$omp single
+ do i = 1, 5
+ a(i) = i * 10
+ end do
+ !$omp end single copyprivate(a)
+ !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_alloc_tp
+!CHECK: %[[SYNC_ADDR:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>> {bindc_name = "a", pinned, uniq_name = "_QFtest_alloc_tpEa"}
+!CHECK: %[[SYNC_VAR:.*]]:2 = hlfir.declare %[[SYNC_ADDR]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_alloc_tpEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+!CHECK-NEXT: omp.parallel {
+!CHECK: omp.single {
+!CHECK: fir.if %{{.*}} {
+!CHECK: %[[TMP0:.*]] = fir.allocmem !fir.array<?xi32>, %{{.*}} {fir.must_be_heap = true, uniq_name = "_QFtest_alloc_tpEa.alloc"}
+!CHECK: %[[TMP1:.*]] = fir.embox %[[TMP0]](%{{.*}}) : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+!CHECK: fir.store %[[TMP1]] to %[[SYNC_VAR]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK: } else {
+!CHECK: %[[TMP2:.*]] = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+!CHECK: %[[TMP3:.*]] = fir.embox %[[TMP2]](%{{.*}}) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+!CHECK: fir.store %[[TMP3]] to %[[SYNC_VAR]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK: }
+!CHECK: %[[TMP4:.*]] = fir.load %[[SYNC_VAR]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK: fir.if %{{.*}} {
+!CHECK: hlfir.assign %{{.*}} to %[[TMP4]] temporary_lhs : !fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>
+!CHECK: }
+!CHECK-NEXT: omp.terminator
+!CHECK-NEXT: }
+!CHECK: fir.if %{{.*}} {
+!CHECK: %[[TMP5:.*]] = fir.load %[[SYNC_VAR]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK-NEXT: omp.barrier
+!CHECK-NEXT: hlfir.assign %[[TMP5]] to %{{.*}} temporary_lhs : !fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>
+!CHECK-NEXT: omp.barrier
+!CHECK: }
+!CHECK: fir.if %{{.*}} {
+!CHECK: %[[TMP6:.*]] = fir.load %[[SYNC_VAR]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK: %[[TMP7:.*]] = fir.box_addr %[[TMP6]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+!CHECK: fir.freemem %[[TMP7]] : !fir.heap<!fir.array<?xi32>>
+!CHECK: }
+subroutine test_alloc_tp()
+ integer, save, allocatable :: a(:)
+ !$omp threadprivate(a)
+ integer :: i
+
+ !$omp parallel
+ allocate(a(10))
+ a = -1
+ !$omp single
+ do i = 1, 5
+ a(i) = i * 10
+ end do
+ !$omp end single copyprivate(a)
+ !$omp end parallel
+end subroutine
More information about the flang-commits
mailing list