[flang-commits] [flang] [flang][OpenMP][RFC] Add support for COPYPRIVATE (PR #73128)

Leandro Lupori via flang-commits flang-commits at lists.llvm.org
Wed Dec 20 08:59:39 PST 2023


https://github.com/luporl updated https://github.com/llvm/llvm-project/pull/73128

>From c4fccf53f093cc2acd19c66b2435f6bbb35c2d27 Mon Sep 17 00:00:00 2001
From: Leandro Lupori <leandro.lupori at linaro.org>
Date: Wed, 22 Nov 2023 11:39:19 -0300
Subject: [PATCH] [flang][OpenMP][RFC] Add support for COPYPRIVATE

Add initial handling of COPYPRIVATE clause.

It was implemented using a temporary stack variable that can be
accessed by all threads, a sync variable. A single thread writes the
value of its private variable to this temporary, at the end of the
single region. After the single operation, all threads then read
from the sync variable and write its value to their private copies.

Fixes https://github.com/llvm/llvm-project/issues/63933
---
 flang/include/flang/Lower/AbstractConverter.h |  23 +
 flang/lib/Lower/Bridge.cpp                    | 430 +++++++++++-------
 flang/lib/Lower/OpenMP.cpp                    |  75 ++-
 flang/lib/Semantics/resolve-directives.cpp    |   3 +-
 flang/test/Lower/OpenMP/Todo/copyprivate.f90  |  13 -
 flang/test/Lower/OpenMP/copyprivate.f90       | 244 ++++++++++
 6 files changed, 596 insertions(+), 192 deletions(-)
 delete mode 100644 flang/test/Lower/OpenMP/Todo/copyprivate.f90
 create mode 100644 flang/test/Lower/OpenMP/copyprivate.f90

diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index b91303387f3d71..016fbe8235e716 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -51,6 +51,7 @@ class DerivedTypeSpec;
 } // namespace semantics
 
 namespace lower {
+struct SymbolBox;
 class SymMap;
 namespace pft {
 struct Variable;
@@ -111,13 +112,35 @@ class AbstractConverter {
   virtual bool
   createHostAssociateVarClone(const Fortran::semantics::Symbol &sym) = 0;
 
+  /// For a given symbol which may not be host-associated, create a clone using
+  /// parameters from the symbol or from the host-associated symbol, if any.
+  /// This member function does not insert the clone in the symbol table and
+  /// does not initialize it.
+  virtual Fortran::lower::SymbolBox
+  createVarClone(const Fortran::semantics::Symbol &sym) = 0;
+
+  /// Initialize a previously created clone.
+  virtual void initVarClone(const Fortran::semantics::Symbol &sym,
+                            const Fortran::lower::SymbolBox &clone) = 0;
+
   virtual void
   createHostAssociateVarCloneDealloc(const Fortran::semantics::Symbol &sym) = 0;
 
+  virtual void createVarCloneDealloc(const Fortran::semantics::Symbol &sym,
+                                     Fortran::lower::SymbolBox &sb) = 0;
+
   virtual void copyHostAssociateVar(
       const Fortran::semantics::Symbol &sym,
       mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr) = 0;
 
+  virtual void copyVar(const Fortran::semantics::Symbol &dst,
+                       const Fortran::lower::SymbolBox &src,
+                       bool needBarrier = false) = 0;
+
+  virtual void copyVar(const Fortran::lower::SymbolBox &dst,
+                       const Fortran::semantics::Symbol &src,
+                       bool needBarrier = false) = 0;
+
   /// For a given symbol, check if it is present in the inner-most
   /// level of the symbol map.
   virtual bool isPresentShallowLookup(Fortran::semantics::Symbol &sym) = 0;
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 6ca910d2696742..69d1d79508e1d1 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -54,6 +54,7 @@
 #include "flang/Semantics/symbol.h"
 #include "flang/Semantics/tools.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Parser/Parser.h"
 #include "mlir/Transforms/RegionUtils.h"
@@ -608,125 +609,41 @@ class FirConverter : public Fortran::lower::AbstractConverter {
 
   bool createHostAssociateVarClone(
       const Fortran::semantics::Symbol &sym) override final {
-    mlir::Location loc = genLocation(sym.name());
-    mlir::Type symType = genType(sym);
-    const auto *details = sym.detailsIf<Fortran::semantics::HostAssocDetails>();
-    assert(details && "No host-association found");
-    const Fortran::semantics::Symbol &hsym = details->symbol();
-    mlir::Type hSymType = genType(hsym);
-    Fortran::lower::SymbolBox hsb = lookupSymbol(hsym);
-
-    auto allocate = [&](llvm::ArrayRef<mlir::Value> shape,
-                        llvm::ArrayRef<mlir::Value> typeParams) -> mlir::Value {
-      mlir::Value allocVal = builder->allocateLocal(
-          loc,
-          Fortran::semantics::IsAllocatableOrObjectPointer(&hsym.GetUltimate())
-              ? hSymType
-              : symType,
-          mangleName(sym), toStringRef(sym.GetUltimate().name()),
-          /*pinned=*/true, shape, typeParams,
-          sym.GetUltimate().attrs().test(Fortran::semantics::Attr::TARGET));
-      return allocVal;
-    };
-
-    fir::ExtendedValue hexv = symBoxToExtendedValue(hsb);
-    fir::ExtendedValue exv = hexv.match(
-        [&](const fir::BoxValue &box) -> fir::ExtendedValue {
-          const Fortran::semantics::DeclTypeSpec *type = sym.GetType();
-          if (type && type->IsPolymorphic())
-            TODO(loc, "create polymorphic host associated copy");
-          // Create a contiguous temp with the same shape and length as
-          // the original variable described by a fir.box.
-          llvm::SmallVector<mlir::Value> extents =
-              fir::factory::getExtents(loc, *builder, hexv);
-          if (box.isDerivedWithLenParameters())
-            TODO(loc, "get length parameters from derived type BoxValue");
-          if (box.isCharacter()) {
-            mlir::Value len = fir::factory::readCharLen(*builder, loc, box);
-            mlir::Value temp = allocate(extents, {len});
-            return fir::CharArrayBoxValue{temp, len, extents};
-          }
-          return fir::ArrayBoxValue{allocate(extents, {}), extents};
-        },
-        [&](const fir::MutableBoxValue &box) -> fir::ExtendedValue {
-          // Allocate storage for a pointer/allocatble descriptor.
-          // No shape/lengths to be passed to the alloca.
-          return fir::MutableBoxValue(allocate({}, {}), {}, {});
-        },
-        [&](const auto &) -> fir::ExtendedValue {
-          mlir::Value temp =
-              allocate(fir::factory::getExtents(loc, *builder, hexv),
-                       fir::factory::getTypeParams(loc, *builder, hexv));
-          return fir::substBase(hexv, temp);
-        });
-
-    // Initialise cloned allocatable
-    hexv.match(
-        [&](const fir::MutableBoxValue &box) -> void {
-          // Do not process pointers
-          if (Fortran::semantics::IsPointer(sym.GetUltimate())) {
-            return;
-          }
-          // Allocate storage for a pointer/allocatble descriptor.
-          // No shape/lengths to be passed to the alloca.
-          const auto new_box = exv.getBoxOf<fir::MutableBoxValue>();
+    assert(sym.detailsIf<Fortran::semantics::HostAssocDetails>() &&
+           "No host-association found");
+    fir::ExtendedValue exv = cloneSymbolValue(sym);
+    fir::ExtendedValue oexv = symBoxToExtendedValue(getOriginalSymbolBox(sym));
+    initClonedValue(sym, exv, oexv);
+    return bindIfNewSymbol(sym, exv);
+  }
 
-          // allocate if allocated
-          mlir::Value isAllocated =
-              fir::factory::genIsAllocatedOrAssociatedTest(*builder, loc, box);
-          auto if_builder = builder->genIfThenElse(loc, isAllocated);
-          if_builder.genThen([&]() {
-            std::string name = mangleName(sym) + ".alloc";
-            if (auto seqTy = symType.dyn_cast<fir::SequenceType>()) {
-              fir::ExtendedValue read = fir::factory::genMutableBoxRead(
-                  *builder, loc, box, /*mayBePolymorphic=*/false);
-              if (auto read_arr_box = read.getBoxOf<fir::ArrayBoxValue>()) {
-                fir::factory::genInlinedAllocation(
-                    *builder, loc, *new_box, read_arr_box->getLBounds(),
-                    read_arr_box->getExtents(),
-                    /*lenParams=*/std::nullopt, name,
-                    /*mustBeHeap=*/true);
-              } else if (auto read_char_arr_box =
-                             read.getBoxOf<fir::CharArrayBoxValue>()) {
-                fir::factory::genInlinedAllocation(
-                    *builder, loc, *new_box, read_char_arr_box->getLBounds(),
-                    read_char_arr_box->getExtents(),
-                    read_char_arr_box->getLen(), name,
-                    /*mustBeHeap=*/true);
-              } else {
-                TODO(loc, "Unhandled allocatable box type");
-              }
-            } else {
-              fir::factory::genInlinedAllocation(
-                  *builder, loc, *new_box, box.getMutableProperties().lbounds,
-                  box.getMutableProperties().extents,
-                  box.nonDeferredLenParams(), name,
-                  /*mustBeHeap=*/true);
-            }
-          });
-          if_builder.genElse([&]() {
-            // nullify box
-            auto empty = fir::factory::createUnallocatedBox(
-                *builder, loc, new_box->getBoxTy(),
-                new_box->nonDeferredLenParams(), {});
-            builder->create<fir::StoreOp>(loc, empty, new_box->getAddr());
-          });
-          if_builder.end();
-        },
-        [&](const auto &) -> void {
-          // Do nothing
-        });
+  Fortran::lower::SymbolBox
+  createVarClone(const Fortran::semantics::Symbol &sym) override final {
+    fir::ExtendedValue exv = cloneSymbolValue(sym);
+    Fortran::lower::SymMap symMap;
+    addSymbol(sym, exv, /*forced=*/true, symMap);
+    return symMap.shallowLookupSymbol(sym);
+  }
 
-    return bindIfNewSymbol(sym, exv);
+  void initVarClone(const Fortran::semantics::Symbol &sym,
+                    const Fortran::lower::SymbolBox &clone) override final {
+    fir::ExtendedValue exv = symBoxToExtendedValue(clone);
+    fir::ExtendedValue oexv = symBoxToExtendedValue(getOriginalSymbolBox(sym));
+    initClonedValue(sym, exv, oexv);
   }
 
   void createHostAssociateVarCloneDealloc(
       const Fortran::semantics::Symbol &sym) override final {
-    mlir::Location loc = genLocation(sym.name());
     Fortran::lower::SymbolBox hsb = lookupSymbol(sym);
+    createVarCloneDealloc(sym, hsb);
+  }
+
+  void createVarCloneDealloc(const Fortran::semantics::Symbol &sym,
+                             Fortran::lower::SymbolBox &sb) override final {
+    mlir::Location loc = genLocation(sym.name());
 
-    fir::ExtendedValue hexv = symBoxToExtendedValue(hsb);
-    hexv.match(
+    fir::ExtendedValue exv = symBoxToExtendedValue(sb);
+    exv.match(
         [&](const fir::MutableBoxValue &new_box) -> void {
           // Do not process pointers
           if (Fortran::semantics::IsPointer(sym.GetUltimate())) {
@@ -740,6 +657,20 @@ class FirConverter : public Fortran::lower::AbstractConverter {
         });
   }
 
+  void copyVar(const Fortran::semantics::Symbol &dst,
+               const Fortran::lower::SymbolBox &src,
+               bool needBarrier = false) override final {
+    Fortran::lower::SymbolBox dst_sb = lookupSymbol(dst);
+    copyVar(dst, dst_sb, src, needBarrier);
+  }
+
+  void copyVar(const Fortran::lower::SymbolBox &dst,
+               const Fortran::semantics::Symbol &src,
+               bool needBarrier = false) override final {
+    Fortran::lower::SymbolBox src_sb = lookupSymbol(src);
+    copyVar(src, dst, src_sb, needBarrier);
+  }
+
   void copyHostAssociateVar(
       const Fortran::semantics::Symbol &sym,
       mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr) override final {
@@ -774,64 +705,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       rhs_sb = &hsb;
     }
 
-    mlir::Location loc = genLocation(sym.name());
-
-    if (lowerToHighLevelFIR()) {
-      hlfir::Entity lhs{lhs_sb->getAddr()};
-      hlfir::Entity rhs{rhs_sb->getAddr()};
-      // Temporary_lhs is set to true in hlfir.assign below to avoid user
-      // assignment to be used and finalization to be called on the LHS.
-      // This may or may not be correct but mimics the current behaviour
-      // without HLFIR.
-      auto copyData = [&](hlfir::Entity l, hlfir::Entity r) {
-        // Dereference RHS and load it if trivial scalar.
-        r = hlfir::loadTrivialScalar(loc, *builder, r);
-        builder->create<hlfir::AssignOp>(
-            loc, r, l,
-            /*isWholeAllocatableAssignment=*/false,
-            /*keepLhsLengthInAllocatableAssignment=*/false,
-            /*temporary_lhs=*/true);
-      };
-      if (lhs.isAllocatable()) {
-        // Deep copy allocatable if it is allocated.
-        // Note that when allocated, the RHS is already allocated with the LHS
-        // shape for copy on entry in createHostAssociateVarClone.
-        // For lastprivate, this assumes that the RHS was not reallocated in
-        // the OpenMP region.
-        lhs = hlfir::derefPointersAndAllocatables(loc, *builder, lhs);
-        mlir::Value addr = hlfir::genVariableRawAddress(loc, *builder, lhs);
-        mlir::Value isAllocated = builder->genIsNotNullAddr(loc, addr);
-        builder->genIfThen(loc, isAllocated)
-            .genThen([&]() {
-              // Copy the DATA, not the descriptors.
-              copyData(lhs, rhs);
-            })
-            .end();
-      } else if (lhs.isPointer()) {
-        // Set LHS target to the target of RHS (do not copy the RHS
-        // target data into the LHS target storage).
-        auto loadVal = builder->create<fir::LoadOp>(loc, rhs);
-        builder->create<fir::StoreOp>(loc, loadVal, lhs);
-      } else {
-        // Non ALLOCATABLE/POINTER variable. Simple DATA copy.
-        copyData(lhs, rhs);
-      }
-    } else {
-      fir::ExtendedValue lhs = symBoxToExtendedValue(*lhs_sb);
-      fir::ExtendedValue rhs = symBoxToExtendedValue(*rhs_sb);
-      mlir::Type symType = genType(sym);
-      if (auto seqTy = symType.dyn_cast<fir::SequenceType>()) {
-        Fortran::lower::StatementContext stmtCtx;
-        Fortran::lower::createSomeArrayAssignment(*this, lhs, rhs, localSymbols,
-                                                  stmtCtx);
-        stmtCtx.finalizeAndReset();
-      } else if (lhs.getBoxOf<fir::CharBoxValue>()) {
-        fir::factory::CharacterExprHelper{*builder, loc}.createAssign(lhs, rhs);
-      } else {
-        auto loadVal = builder->create<fir::LoadOp>(loc, fir::getBase(rhs));
-        builder->create<fir::StoreOp>(loc, loadVal, fir::getBase(lhs));
-      }
-    }
+    copyVar(sym, *lhs_sb, *rhs_sb);
 
     if (copyAssignIP && copyAssignIP->isSet() &&
         sym.test(Fortran::semantics::Symbol::Flag::OmpLastPrivate)) {
@@ -1079,16 +953,226 @@ class FirConverter : public Fortran::lower::AbstractConverter {
                  fir::ExtendedValue val, bool forced = false) {
     if (!forced && lookupSymbol(sym))
       return false;
+    return addSymbol(sym, val, forced, localSymbols);
+  }
+
+  /// Add the symbol to \p symMap.
+  /// Always returns `true`.
+  bool addSymbol(const Fortran::semantics::SymbolRef sym,
+                 fir::ExtendedValue val, bool forced,
+                 Fortran::lower::SymMap &symMap) {
     if (lowerToHighLevelFIR()) {
-      Fortran::lower::genDeclareSymbol(*this, localSymbols, sym, val,
-                                       fir::FortranVariableFlagsEnum::None,
-                                       forced);
+      Fortran::lower::genDeclareSymbol(
+          *this, symMap, sym, val, fir::FortranVariableFlagsEnum::None, forced);
     } else {
-      localSymbols.addSymbol(sym, val, forced);
+      symMap.addSymbol(sym, val, forced);
     }
     return true;
   }
 
+  void initClonedValue(const Fortran::semantics::Symbol &sym,
+                       const fir::ExtendedValue &clone,
+                       const fir::ExtendedValue &orig) {
+    mlir::Location loc = genLocation(sym.name());
+    mlir::Type symType = genType(sym);
+    // The type of a non host associated symbol may be wrapped inside a box.
+    if (!sym.detailsIf<Fortran::semantics::HostAssocDetails>()) {
+      if (mlir::Type seqType = fir::unwrapUntilSeqType(symType))
+        symType = seqType;
+    }
+
+    // Initialise cloned allocatable
+    orig.match(
+        [&](const fir::MutableBoxValue &box) -> void {
+          // Do not process pointers
+          if (Fortran::semantics::IsPointer(sym.GetUltimate())) {
+            return;
+          }
+          // Allocate storage for a pointer/allocatble descriptor.
+          // No shape/lengths to be passed to the alloca.
+          const auto new_box = clone.getBoxOf<fir::MutableBoxValue>();
+
+          // allocate if allocated
+          mlir::Value isAllocated =
+              fir::factory::genIsAllocatedOrAssociatedTest(*builder, loc, box);
+          auto if_builder = builder->genIfThenElse(loc, isAllocated);
+          if_builder.genThen([&]() {
+            std::string name = mangleName(sym) + ".alloc";
+            if (auto seqTy = symType.dyn_cast<fir::SequenceType>()) {
+              fir::ExtendedValue read = fir::factory::genMutableBoxRead(
+                  *builder, loc, box, /*mayBePolymorphic=*/false);
+              if (auto read_arr_box = read.getBoxOf<fir::ArrayBoxValue>()) {
+                fir::factory::genInlinedAllocation(
+                    *builder, loc, *new_box, read_arr_box->getLBounds(),
+                    read_arr_box->getExtents(),
+                    /*lenParams=*/std::nullopt, name,
+                    /*mustBeHeap=*/true);
+              } else if (auto read_char_arr_box =
+                             read.getBoxOf<fir::CharArrayBoxValue>()) {
+                fir::factory::genInlinedAllocation(
+                    *builder, loc, *new_box, read_char_arr_box->getLBounds(),
+                    read_char_arr_box->getExtents(),
+                    read_char_arr_box->getLen(), name,
+                    /*mustBeHeap=*/true);
+              } else {
+                TODO(loc, "Unhandled allocatable box type");
+              }
+            } else {
+              fir::factory::genInlinedAllocation(
+                  *builder, loc, *new_box, box.getMutableProperties().lbounds,
+                  box.getMutableProperties().extents,
+                  box.nonDeferredLenParams(), name,
+                  /*mustBeHeap=*/true);
+            }
+          });
+          if_builder.genElse([&]() {
+            // nullify box
+            auto empty = fir::factory::createUnallocatedBox(
+                *builder, loc, new_box->getBoxTy(),
+                new_box->nonDeferredLenParams(), {});
+            builder->create<fir::StoreOp>(loc, empty, new_box->getAddr());
+          });
+          if_builder.end();
+        },
+        [&](const auto &) -> void {
+          // Do nothing
+        });
+  }
+
+  Fortran::lower::SymbolBox
+  getOriginalSymbolBox(const Fortran::semantics::Symbol &sym) {
+    const auto *details = sym.detailsIf<Fortran::semantics::HostAssocDetails>();
+    if (details) {
+      const Fortran::semantics::Symbol &hsym = details->symbol();
+      return lookupSymbol(hsym);
+    }
+    return lookupSymbol(sym);
+  }
+
+  fir::ExtendedValue cloneSymbolValue(const Fortran::semantics::Symbol &sym) {
+    mlir::Location loc = genLocation(sym.name());
+    mlir::Type symType = genType(sym);
+    mlir::Type allocType = symType;
+    const auto *details = sym.detailsIf<Fortran::semantics::HostAssocDetails>();
+    if (details) {
+      const Fortran::semantics::Symbol &hsym = details->symbol();
+      if (Fortran::semantics::IsAllocatableOrObjectPointer(&hsym.GetUltimate()))
+        allocType = genType(hsym);
+    }
+    Fortran::lower::SymbolBox sb = getOriginalSymbolBox(sym);
+
+    auto allocate = [&](llvm::ArrayRef<mlir::Value> shape,
+                        llvm::ArrayRef<mlir::Value> typeParams) -> mlir::Value {
+      mlir::Value allocVal = builder->allocateLocal(
+          loc, allocType, mangleName(sym),
+          toStringRef(sym.GetUltimate().name()),
+          /*pinned=*/true, shape, typeParams,
+          sym.GetUltimate().attrs().test(Fortran::semantics::Attr::TARGET));
+      return allocVal;
+    };
+
+    fir::ExtendedValue oexv = symBoxToExtendedValue(sb);
+    fir::ExtendedValue exv = oexv.match(
+        [&](const fir::BoxValue &box) -> fir::ExtendedValue {
+          const Fortran::semantics::DeclTypeSpec *type = sym.GetType();
+          if (type && type->IsPolymorphic())
+            TODO(loc, "create polymorphic copy");
+          // Create a contiguous temp with the same shape and length as
+          // the original variable described by a fir.box.
+          llvm::SmallVector<mlir::Value> extents =
+              fir::factory::getExtents(loc, *builder, oexv);
+          if (box.isDerivedWithLenParameters())
+            TODO(loc, "get length parameters from derived type BoxValue");
+          if (box.isCharacter()) {
+            mlir::Value len = fir::factory::readCharLen(*builder, loc, box);
+            mlir::Value temp = allocate(extents, {len});
+            return fir::CharArrayBoxValue{temp, len, extents};
+          }
+          return fir::ArrayBoxValue{allocate(extents, {}), extents};
+        },
+        [&](const fir::MutableBoxValue &box) -> fir::ExtendedValue {
+          // Allocate storage for a pointer/allocatble descriptor.
+          // No shape/lengths to be passed to the alloca.
+          return fir::MutableBoxValue(allocate({}, {}), {}, {});
+        },
+        [&](const auto &) -> fir::ExtendedValue {
+          mlir::Value temp =
+              allocate(fir::factory::getExtents(loc, *builder, oexv),
+                       fir::factory::getTypeParams(loc, *builder, oexv));
+          return fir::substBase(oexv, temp);
+        });
+
+    return exv;
+  }
+
+  void copyVar(const Fortran::semantics::Symbol &sym,
+               const Fortran::lower::SymbolBox &lhs_sb,
+               const Fortran::lower::SymbolBox &rhs_sb,
+               bool needBarrier = false) {
+    mlir::Location loc = genLocation(sym.name());
+
+    if (lowerToHighLevelFIR()) {
+      hlfir::Entity lhs{lhs_sb.getAddr()};
+      hlfir::Entity rhs{rhs_sb.getAddr()};
+      // Temporary_lhs is set to true in hlfir.assign below to avoid user
+      // assignment to be used and finalization to be called on the LHS.
+      // This may or may not be correct but mimics the current behaviour
+      // without HLFIR.
+      auto copyData = [&](hlfir::Entity l, hlfir::Entity r) {
+        // Dereference RHS and load it if trivial scalar.
+        hlfir::Entity r2 = hlfir::loadTrivialScalar(loc, *builder, r);
+        if (needBarrier && r2 != r)
+          builder->create<mlir::omp::BarrierOp>(loc);
+        builder->create<hlfir::AssignOp>(
+            loc, r2, l,
+            /*isWholeAllocatableAssignment=*/false,
+            /*keepLhsLengthInAllocatableAssignment=*/false,
+            /*temporary_lhs=*/true);
+        if (needBarrier)
+          builder->create<mlir::omp::BarrierOp>(loc);
+      };
+      if (lhs.isAllocatable()) {
+        // Deep copy allocatable if it is allocated.
+        // Note that when allocated, the RHS is already allocated with the LHS
+        // shape for copy on entry in createHostAssociateVarClone.
+        // For lastprivate, this assumes that the RHS was not reallocated in
+        // the OpenMP region.
+        lhs = hlfir::derefPointersAndAllocatables(loc, *builder, lhs);
+        mlir::Value addr = hlfir::genVariableRawAddress(loc, *builder, lhs);
+        mlir::Value isAllocated = builder->genIsNotNullAddr(loc, addr);
+        builder->genIfThen(loc, isAllocated)
+            .genThen([&]() {
+              // Copy the DATA, not the descriptors.
+              copyData(lhs, rhs);
+            })
+            .end();
+      } else if (lhs.isPointer()) {
+        // Set LHS target to the target of RHS (do not copy the RHS
+        // target data into the LHS target storage).
+        auto loadVal = builder->create<fir::LoadOp>(loc, rhs);
+        builder->create<fir::StoreOp>(loc, loadVal, lhs);
+      } else {
+        // Non ALLOCATABLE/POINTER variable. Simple DATA copy.
+        copyData(lhs, rhs);
+      }
+    } else {
+      fir::ExtendedValue lhs = symBoxToExtendedValue(lhs_sb);
+      fir::ExtendedValue rhs = symBoxToExtendedValue(rhs_sb);
+      mlir::Type symType = genType(sym);
+      if (auto seqTy = symType.dyn_cast<fir::SequenceType>()) {
+        Fortran::lower::StatementContext stmtCtx;
+        Fortran::lower::createSomeArrayAssignment(*this, lhs, rhs, localSymbols,
+                                                  stmtCtx);
+        stmtCtx.finalizeAndReset();
+      } else if (lhs.getBoxOf<fir::CharBoxValue>()) {
+        fir::factory::CharacterExprHelper{*builder, loc}.createAssign(lhs, rhs);
+      } else {
+        auto loadVal = builder->create<fir::LoadOp>(loc, fir::getBase(rhs));
+        builder->create<fir::StoreOp>(loc, loadVal, fir::getBase(lhs));
+      }
+    }
+  }
+
   /// Map a block argument to a result or dummy symbol. This is not the
   /// definitive mapping. The specification expression have not been lowered
   /// yet. The final mapping will be done using this pre-mapping in
diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 764d2175c0a962..c18870177f260b 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -18,6 +18,7 @@
 #include "flang/Lower/ConvertVariable.h"
 #include "flang/Lower/PFTBuilder.h"
 #include "flang/Lower/StatementContext.h"
+#include "flang/Lower/SymbolMap.h"
 #include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Builder/Todo.h"
@@ -562,6 +563,7 @@ class ClauseProcessor {
   processAllocate(llvm::SmallVectorImpl<mlir::Value> &allocatorOperands,
                   llvm::SmallVectorImpl<mlir::Value> &allocateOperands) const;
   bool processCopyin() const;
+  bool processCopyPrivate(mlir::Operation *singleOp) const;
   bool processDepend(llvm::SmallVectorImpl<mlir::Attribute> &dependTypeOperands,
                      llvm::SmallVectorImpl<mlir::Value> &dependOperands) const;
   bool
@@ -1634,6 +1636,67 @@ bool ClauseProcessor::processCopyin() const {
   return hasCopyin;
 }
 
+bool ClauseProcessor::processCopyPrivate(mlir::Operation *singleOp) const {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  assert(singleOp->getNumRegions() == 1 && !singleOp->getRegion(0).empty());
+
+  auto copyPrivateVar = [&](Fortran::semantics::Symbol *sym) {
+    // If we are inside a parallel operation, then create a clone before it,
+    // else insert the clone right before the single operation.
+    mlir::Operation *insOp = singleOp->getParentOp();
+    while (insOp) {
+      if (mlir::dyn_cast<mlir::omp::ParallelOp>(insOp))
+        break;
+      insOp = insOp->getParentOp();
+    }
+    if (!insOp)
+      insOp = singleOp;
+    firOpBuilder.setInsertionPoint(insOp);
+    Fortran::lower::SymbolBox clone = converter.createVarClone(*sym);
+    // Initialize clone, at the end of the single region.
+    mlir::Operation *lastOp = singleOp->getRegion(0).back().getTerminator();
+    firOpBuilder.setInsertionPoint(lastOp);
+    converter.initVarClone(*sym, clone);
+    converter.copyVar(clone, *sym);
+    // Reload the private variable from the clone, after the single operation.
+    // For ALLOCATABLE variables, the copy will happen inside an if-allocated
+    // block and this is where the barrier will be inserted. This can cause
+    // a hang if some threads have allocated the variable and others not.
+    // Luckly, OpenMP spec [OMP 5.2, 5.7.2] states that any list item with the
+    // ALLOCATABLE attribute must have the allocation status of allocated when
+    // the intrinsic assignment is performed.
+    firOpBuilder.setInsertionPointAfter(singleOp);
+    converter.copyVar(*sym, clone, /*needBarrier=*/true);
+    converter.createVarCloneDealloc(*sym, clone);
+  };
+
+  bool hasCopyPrivate = findRepeatableClause<ClauseTy::Copyprivate>(
+      [&](const ClauseTy::Copyprivate *copyPrivateClause,
+          const Fortran::parser::CharBlock &) {
+        const Fortran::parser::OmpObjectList &ompObjectList =
+            copyPrivateClause->v;
+        for (const Fortran::parser::OmpObject &ompObject : ompObjectList.v) {
+          Fortran::semantics::Symbol *sym = getOmpObjectSymbol(ompObject);
+          if (const auto *commonDetails =
+                  sym->detailsIf<Fortran::semantics::CommonBlockDetails>()) {
+            for (const auto &mem : commonDetails->objects())
+              copyPrivateVar(&*mem);
+            break;
+          }
+          copyPrivateVar(sym);
+        }
+      });
+
+  if (hasCopyPrivate) {
+    // Set insertion point to start of region.
+    // At least a terminator should be present.
+    auto &ops = singleOp->getRegion(0).front().getOperations();
+    assert(!ops.empty() && "Unexpected empty region");
+    firOpBuilder.setInsertionPoint(&ops.front());
+  }
+  return hasCopyPrivate;
+}
+
 bool ClauseProcessor::processDepend(
     llvm::SmallVectorImpl<mlir::Attribute> &dependTypeOperands,
     llvm::SmallVectorImpl<mlir::Value> &dependOperands) const {
@@ -2315,14 +2378,15 @@ genSingleOp(Fortran::lower::AbstractConverter &converter,
 
   ClauseProcessor cp(converter, beginClauseList);
   cp.processAllocate(allocatorOperands, allocateOperands);
-  cp.processTODO<Fortran::parser::OmpClause::Copyprivate>(
-      currentLocation, llvm::omp::Directive::OMPD_single);
 
-  ClauseProcessor(converter, endClauseList).processNowait(nowaitAttr);
+  ClauseProcessor ecp(converter, endClauseList);
+  ecp.processNowait(nowaitAttr);
 
-  return genOpWithBody<mlir::omp::SingleOp>(
+  auto singleOp = genOpWithBody<mlir::omp::SingleOp>(
       converter, eval, currentLocation, /*outerCombined=*/false,
       &beginClauseList, allocateOperands, allocatorOperands, nowaitAttr);
+  ecp.processCopyPrivate(singleOp);
+  return singleOp;
 }
 
 static mlir::omp::TaskOp
@@ -3105,7 +3169,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
 
   for (const auto &clause : endClauseList.v) {
     mlir::Location clauseLocation = converter.genLocation(clause.source);
-    if (!std::get_if<Fortran::parser::OmpClause::Nowait>(&clause.u))
+    if (!std::get_if<Fortran::parser::OmpClause::Nowait>(&clause.u) &&
+        !std::get_if<Fortran::parser::OmpClause::Copyprivate>(&clause.u))
       TODO(clauseLocation, "OpenMP Block construct clause");
   }
 
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index da6c865ad56a3b..e59cdd7b439b4a 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -2382,7 +2382,8 @@ void OmpAttributeVisitor::CheckDataCopyingClause(
       // either 'private' or 'threadprivate' in enclosing context.
       if (!checkSymbol->test(Symbol::Flag::OmpThreadprivate) &&
           !(HasSymbolInEnclosingScope(symbol, currScope()) &&
-              symbol.test(Symbol::Flag::OmpPrivate))) {
+              (symbol.test(Symbol::Flag::OmpPrivate) ||
+                  symbol.test(Symbol::Flag::OmpFirstPrivate)))) {
         context_.Say(name.source,
             "COPYPRIVATE variable '%s' is not PRIVATE or THREADPRIVATE in "
             "outer context"_err_en_US,
diff --git a/flang/test/Lower/OpenMP/Todo/copyprivate.f90 b/flang/test/Lower/OpenMP/Todo/copyprivate.f90
deleted file mode 100644
index 0d871427ce60ff..00000000000000
--- a/flang/test/Lower/OpenMP/Todo/copyprivate.f90
+++ /dev/null
@@ -1,13 +0,0 @@
-! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-
-! CHECK: not yet implemented: OpenMP Block construct clause
-subroutine sb
-  integer, save :: a
-  !$omp threadprivate(a)
-  !$omp parallel
-  !$omp single
-  a = 3
-  !$omp end single copyprivate(a)
-  !$omp end parallel
-end subroutine
diff --git a/flang/test/Lower/OpenMP/copyprivate.f90 b/flang/test/Lower/OpenMP/copyprivate.f90
new file mode 100644
index 00000000000000..bac71a79424645
--- /dev/null
+++ b/flang/test/Lower/OpenMP/copyprivate.f90
@@ -0,0 +1,244 @@
+! Test COPYPRIVATE.
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+!CHECK-LABEL: func @_QPtest_tp
+!CHECK:       %[[SYNC_VAR_ADDR:.*]] = fir.alloca f32 {bindc_name = "a", pinned, uniq_name = "_QFtest_tpEa"}
+!CHECK:       %[[SYNC_VAR:.*]]:2 = hlfir.declare %[[SYNC_VAR_ADDR]] {uniq_name = "_QFtest_tpEa"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:       omp.single {
+!CHECK:         hlfir.assign %{{.*}} to %[[SYNC_VAR]]#0 temporary_lhs : f32, !fir.ref<f32>
+!CHECK-NEXT:    omp.terminator
+!CHECK-NEXT:  }
+!CHECK-NEXT:  %[[TMP:.*]] = fir.load %[[SYNC_VAR]]#0 : !fir.ref<f32>
+!CHECK-NEXT:  omp.barrier
+!CHECK-NEXT:  hlfir.assign %[[TMP]] to %{{.*}}#1 temporary_lhs : f32, !fir.ref<f32>
+!CHECK-NEXT:  omp.barrier
+subroutine test_tp()
+  real(4), save :: a = 2.5
+  !$omp threadprivate(a)
+
+  !$omp single
+  a = 1.5
+  !$omp end single copyprivate(a)
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_priv
+!CHECK:       %[[ORIG_VAR:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_privEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:       %[[SYNC_VAR:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_privEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:       omp.parallel {
+!CHECK:         omp.single {
+!CHECK:           hlfir.assign %{{.*}} to %[[SYNC_VAR]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT:      omp.terminator
+!CHECK-NEXT:    }
+!CHECK-NEXT:    %[[TMP:.*]] = fir.load %[[SYNC_VAR]]#0 : !fir.ref<i32>
+!CHECK-NEXT:    omp.barrier
+!CHECK-NEXT:    hlfir.assign %[[TMP]] to %{{.*}}#1 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT:    omp.barrier
+!CHECK:       }
+subroutine test_priv()
+  integer :: i
+
+  i = 11
+  !$omp parallel firstprivate(i)
+  !$omp single
+  i = i + 1
+  !$omp end single copyprivate(i)
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_array
+!CHECK:       %[[ORIG_VAR:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+!CHECK:       %[[SYNC_VAR:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+!CHECK:       omp.parallel {
+!CHECK:         omp.single {
+!CHECK:           hlfir.assign %{{.*}}#1 to %[[SYNC_VAR]]#0 temporary_lhs : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
+!CHECK-NEXT:      omp.terminator
+!CHECK-NEXT:    }
+!CHECK-NEXT:    hlfir.assign %[[SYNC_VAR]]#0 to %{{.*}}#1 temporary_lhs : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
+!CHECK-NEXT:    omp.barrier
+!CHECK:       }
+subroutine test_array()
+  integer :: a(10), i
+
+  a = -1
+  !$omp parallel firstprivate(a)
+  !$omp single
+  do i = 1, 5
+    a(i) = i * 10
+  end do
+  !$omp end single copyprivate(a)
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_type
+!CHECK:       %[[ORIG_VAR:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_typeEt"} : (!fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>) -> (!fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>, !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>)
+!CHECK:       %[[SYNC_VAR:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_typeEt"} : (!fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>) -> (!fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>, !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>)
+!CHECK:       omp.parallel {
+!CHECK:         omp.single {
+!CHECK:           hlfir.assign %{{.*}}#1 to %[[SYNC_VAR]]#0 temporary_lhs : !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>, !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>
+!CHECK-NEXT:      omp.terminator
+!CHECK-NEXT:    }
+!CHECK-NEXT:    hlfir.assign %[[SYNC_VAR]]#0 to %{{.*}}#1 temporary_lhs : !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>, !fir.ref<!fir.type<_QFtest_typeTty{i:i32,r:f32,a:!fir.array<10xi32>}>>
+!CHECK-NEXT:    omp.barrier
+!CHECK:       }
+subroutine test_type()
+  type ty
+    integer :: i
+    real :: r
+    integer, dimension(10) :: a
+  end type
+
+  integer :: i
+  type(ty) :: t
+
+  t%i = -1
+  t%r = -1.5
+  t%a = -1
+  !$omp parallel firstprivate(t)
+  !$omp single
+  t%i = 42
+  t%r = 3.14
+  do i = 1, 5
+    t%a(i) = i * 10
+  end do
+  !$omp end single copyprivate(t)
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_multi
+!CHECK:       %[[I_SYNC_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFtest_multiEi"}
+!CHECK:       %[[I_SYNC:.*]]:2 = hlfir.declare %[[I_SYNC_ADDR]] {uniq_name = "_QFtest_multiEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:       %[[J_SYNC_ADDR:.*]] = fir.alloca i32 {bindc_name = "j", pinned, uniq_name = "_QFtest_multiEj"}
+!CHECK:       %[[J_SYNC:.*]]:2 = hlfir.declare %[[J_SYNC_ADDR]] {uniq_name = "_QFtest_multiEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:       %[[K_SYNC_ADDR:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFtest_multiEk"}
+!CHECK:       %[[K_SYNC:.*]]:2 = hlfir.declare %[[K_SYNC_ADDR]] {uniq_name = "_QFtest_multiEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:       omp.parallel {
+!CHECK:         omp.single {
+!CHECK:           %[[I:.*]] = fir.load %{{.*}}#1 : !fir.ref<i32>
+!CHECK:           hlfir.assign %[[I]] to %[[I_SYNC]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK:           %[[J:.*]] = fir.load %{{.*}}#1 : !fir.ref<i32>
+!CHECK:           hlfir.assign %[[J]] to %[[J_SYNC]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK:           %[[K:.*]] = fir.load %{{.*}}#1 : !fir.ref<i32>
+!CHECK:           hlfir.assign %[[K]] to %[[K_SYNC]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT:      omp.terminator
+!CHECK-NEXT:    }
+!CHECK-NEXT:    %[[K:.*]] = fir.load %[[K_SYNC]]#0 : !fir.ref<i32>
+!CHECK-NEXT:    omp.barrier
+!CHECK-NEXT:    hlfir.assign %[[K]] to %{{.*}}#1 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT:    omp.barrier
+!CHECK-NEXT:    %[[J:.*]] = fir.load %[[J_SYNC]]#0 : !fir.ref<i32>
+!CHECK-NEXT:    omp.barrier
+!CHECK-NEXT:    hlfir.assign %[[J]] to %{{.*}}#1 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT:    omp.barrier
+!CHECK-NEXT:    %[[I:.*]] = fir.load %[[I_SYNC]]#0 : !fir.ref<i32>
+!CHECK-NEXT:    omp.barrier
+!CHECK-NEXT:    hlfir.assign %[[I]] to %{{.*}}#1 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT:    omp.barrier
+!CHECK:       }
+subroutine test_multi()
+  integer, save :: i, j, k
+  !$omp threadprivate(i, j, k)
+
+  i = 11
+  j = 12
+  k = 13
+  !$omp parallel
+  !$omp single
+  i = 21
+  j = 22
+  k = 23
+  !$omp end single copyprivate(i, j, k)
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_alloc
+!CHECK:       %[[SYNC_ADDR:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>> {bindc_name = "a", pinned, uniq_name = "_QFtest_allocEa"}
+!CHECK:       %[[SYNC_VAR:.*]]:2 = hlfir.declare %[[SYNC_ADDR]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_allocEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+!CHECK-NEXT:  omp.parallel {
+!CHECK:         omp.single {
+!CHECK:           fir.if %{{.*}} {
+!CHECK:             %[[TMP0:.*]] = fir.allocmem !fir.array<?xi32>, %{{.*}} {fir.must_be_heap = true, uniq_name = "_QFtest_allocEa.alloc"}
+!CHECK:             %[[TMP1:.*]] = fir.embox %[[TMP0]](%{{.*}}) : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+!CHECK:             fir.store %[[TMP1]] to %[[SYNC_VAR]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK:           } else {
+!CHECK:             %[[TMP2:.*]] = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+!CHECK:             %[[TMP3:.*]] = fir.embox %[[TMP2]](%{{.*}}) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+!CHECK:             fir.store %[[TMP3]] to %[[SYNC_VAR]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK:           }
+!CHECK:           %[[TMP4:.*]] = fir.load %[[SYNC_VAR]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK:           fir.if %{{.*}} {
+!CHECK:             hlfir.assign %{{.*}} to %[[TMP4]] temporary_lhs : !fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>
+!CHECK:           }
+!CHECK-NEXT:      omp.terminator
+!CHECK-NEXT:    }
+!CHECK:         fir.if %{{.*}} {
+!CHECK:           %[[TMP5:.*]] = fir.load %[[SYNC_VAR]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK-NEXT:      omp.barrier
+!CHECK-NEXT:      hlfir.assign %[[TMP5]] to %{{.*}} temporary_lhs : !fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>
+!CHECK-NEXT:      omp.barrier
+!CHECK:         }
+!CHECK:         fir.if %{{.*}} {
+!CHECK:           %[[TMP6:.*]] = fir.load %[[SYNC_VAR]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK:           %[[TMP7:.*]] = fir.box_addr %[[TMP6]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+!CHECK:           fir.freemem %[[TMP7]] : !fir.heap<!fir.array<?xi32>>
+!CHECK:         }
+subroutine test_alloc()
+  integer, allocatable :: a(:)
+  integer :: i
+
+  allocate(a(10))
+  a = -1
+  !$omp parallel firstprivate(a)
+  !$omp single
+  do i = 1, 5
+    a(i) = i * 10
+  end do
+  !$omp end single copyprivate(a)
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_alloc_tp
+!CHECK:       %[[SYNC_ADDR:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>> {bindc_name = "a", pinned, uniq_name = "_QFtest_alloc_tpEa"}
+!CHECK:       %[[SYNC_VAR:.*]]:2 = hlfir.declare %[[SYNC_ADDR]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_alloc_tpEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+!CHECK-NEXT:  omp.parallel {
+!CHECK:         omp.single {
+!CHECK:           fir.if %{{.*}} {
+!CHECK:             %[[TMP0:.*]] = fir.allocmem !fir.array<?xi32>, %{{.*}} {fir.must_be_heap = true, uniq_name = "_QFtest_alloc_tpEa.alloc"}
+!CHECK:             %[[TMP1:.*]] = fir.embox %[[TMP0]](%{{.*}}) : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+!CHECK:             fir.store %[[TMP1]] to %[[SYNC_VAR]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK:           } else {
+!CHECK:             %[[TMP2:.*]] = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+!CHECK:             %[[TMP3:.*]] = fir.embox %[[TMP2]](%{{.*}}) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+!CHECK:             fir.store %[[TMP3]] to %[[SYNC_VAR]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK:           }
+!CHECK:           %[[TMP4:.*]] = fir.load %[[SYNC_VAR]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK:           fir.if %{{.*}} {
+!CHECK:             hlfir.assign %{{.*}} to %[[TMP4]] temporary_lhs : !fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>
+!CHECK:           }
+!CHECK-NEXT:      omp.terminator
+!CHECK-NEXT:    }
+!CHECK:         fir.if %{{.*}} {
+!CHECK:           %[[TMP5:.*]] = fir.load %[[SYNC_VAR]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK-NEXT:      omp.barrier
+!CHECK-NEXT:      hlfir.assign %[[TMP5]] to %{{.*}} temporary_lhs : !fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>
+!CHECK-NEXT:      omp.barrier
+!CHECK:         }
+!CHECK:         fir.if %{{.*}} {
+!CHECK:           %[[TMP6:.*]] = fir.load %[[SYNC_VAR]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!CHECK:           %[[TMP7:.*]] = fir.box_addr %[[TMP6]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+!CHECK:           fir.freemem %[[TMP7]] : !fir.heap<!fir.array<?xi32>>
+!CHECK:         }
+subroutine test_alloc_tp()
+  integer, save, allocatable :: a(:)
+  !$omp threadprivate(a)
+  integer :: i
+
+  !$omp parallel
+  allocate(a(10))
+  a = -1
+  !$omp single
+  do i = 1, 5
+    a(i) = i * 10
+  end do
+  !$omp end single copyprivate(a)
+  !$omp end parallel
+end subroutine



More information about the flang-commits mailing list