[flang-commits] [flang] [llvm] [mlir] [flang][mlir][OpenMP] Add support for COPYPRIVATE (PR #73128)

Leandro Lupori via flang-commits flang-commits at lists.llvm.org
Mon Jan 22 05:48:05 PST 2024


https://github.com/luporl updated https://github.com/llvm/llvm-project/pull/73128

>From 8ce1986bde53e480dc720ad4cf78a7828d24faba Mon Sep 17 00:00:00 2001
From: Leandro Lupori <leandro.lupori at linaro.org>
Date: Wed, 20 Dec 2023 11:05:47 -0300
Subject: [PATCH 1/8] [flang][mlir][OpenMP] Add support for COPYPRIVATE

Add initial handling of OpenMP COPYPRIVATE clause in Flang.

MLIR's omp.single operation was modified to support an optional
CopyPrivateVarList. It consists of pairs of variables and
functions. When present, each thread variable is updated with the
variable value of the thread that executed the single region,
using the specified functions to perform the copy.

When lowering COPYPRIVATE, Flang then generates the copy function
needed by each variable and builds the appropriate
CopyPrivateVarList. The translation to LLVM IR is done in
OMPIRBuilder, by calling createCopyPrivate() for each variable in
the list, which generates calls to __kmpc_copyprivate.

Fixes https://github.com/llvm/llvm-project/issues/63933
---
 flang/include/flang/Lower/AbstractConverter.h |   3 +
 flang/lib/Lower/Bridge.cpp                    | 137 ++++++-----
 flang/lib/Lower/OpenMP.cpp                    | 212 +++++++++++++++++-
 flang/lib/Semantics/resolve-directives.cpp    |   3 +-
 flang/test/Lower/OpenMP/Todo/copyprivate.f90  |  13 --
 flang/test/Lower/OpenMP/copyprivate.f90       |  48 ++++
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |   6 +-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  24 +-
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |  10 +
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 105 ++++++++-
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  20 +-
 11 files changed, 498 insertions(+), 83 deletions(-)
 delete mode 100644 flang/test/Lower/OpenMP/Todo/copyprivate.f90
 create mode 100644 flang/test/Lower/OpenMP/copyprivate.f90

diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index c19dcbdcdb3902..48804c327e1c70 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -120,6 +120,9 @@ class AbstractConverter {
       const Fortran::semantics::Symbol &sym,
       mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr) = 0;
 
+  virtual void copyVar(mlir::Location loc, mlir::Value dst,
+                       mlir::Value src) = 0;
+
   /// For a given symbol, check if it is present in the inner-most
   /// level of the symbol map.
   virtual bool isPresentShallowLookup(Fortran::semantics::Symbol &sym) = 0;
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 8006b9b426f4dc..04d09c48484910 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -743,6 +743,11 @@ class FirConverter : public Fortran::lower::AbstractConverter {
         });
   }
 
+  void copyVar(mlir::Location loc, mlir::Value dst,
+               mlir::Value src) override final {
+    copyVarHLFIR(loc, dst, src);
+  }
+
   void copyHostAssociateVar(
       const Fortran::semantics::Symbol &sym,
       mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr) override final {
@@ -777,64 +782,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       rhs_sb = &hsb;
     }
 
-    mlir::Location loc = genLocation(sym.name());
-
-    if (lowerToHighLevelFIR()) {
-      hlfir::Entity lhs{lhs_sb->getAddr()};
-      hlfir::Entity rhs{rhs_sb->getAddr()};
-      // Temporary_lhs is set to true in hlfir.assign below to avoid user
-      // assignment to be used and finalization to be called on the LHS.
-      // This may or may not be correct but mimics the current behaviour
-      // without HLFIR.
-      auto copyData = [&](hlfir::Entity l, hlfir::Entity r) {
-        // Dereference RHS and load it if trivial scalar.
-        r = hlfir::loadTrivialScalar(loc, *builder, r);
-        builder->create<hlfir::AssignOp>(
-            loc, r, l,
-            /*isWholeAllocatableAssignment=*/false,
-            /*keepLhsLengthInAllocatableAssignment=*/false,
-            /*temporary_lhs=*/true);
-      };
-      if (lhs.isAllocatable()) {
-        // Deep copy allocatable if it is allocated.
-        // Note that when allocated, the RHS is already allocated with the LHS
-        // shape for copy on entry in createHostAssociateVarClone.
-        // For lastprivate, this assumes that the RHS was not reallocated in
-        // the OpenMP region.
-        lhs = hlfir::derefPointersAndAllocatables(loc, *builder, lhs);
-        mlir::Value addr = hlfir::genVariableRawAddress(loc, *builder, lhs);
-        mlir::Value isAllocated = builder->genIsNotNullAddr(loc, addr);
-        builder->genIfThen(loc, isAllocated)
-            .genThen([&]() {
-              // Copy the DATA, not the descriptors.
-              copyData(lhs, rhs);
-            })
-            .end();
-      } else if (lhs.isPointer()) {
-        // Set LHS target to the target of RHS (do not copy the RHS
-        // target data into the LHS target storage).
-        auto loadVal = builder->create<fir::LoadOp>(loc, rhs);
-        builder->create<fir::StoreOp>(loc, loadVal, lhs);
-      } else {
-        // Non ALLOCATABLE/POINTER variable. Simple DATA copy.
-        copyData(lhs, rhs);
-      }
-    } else {
-      fir::ExtendedValue lhs = symBoxToExtendedValue(*lhs_sb);
-      fir::ExtendedValue rhs = symBoxToExtendedValue(*rhs_sb);
-      mlir::Type symType = genType(sym);
-      if (auto seqTy = symType.dyn_cast<fir::SequenceType>()) {
-        Fortran::lower::StatementContext stmtCtx;
-        Fortran::lower::createSomeArrayAssignment(*this, lhs, rhs, localSymbols,
-                                                  stmtCtx);
-        stmtCtx.finalizeAndReset();
-      } else if (lhs.getBoxOf<fir::CharBoxValue>()) {
-        fir::factory::CharacterExprHelper{*builder, loc}.createAssign(lhs, rhs);
-      } else {
-        auto loadVal = builder->create<fir::LoadOp>(loc, fir::getBase(rhs));
-        builder->create<fir::StoreOp>(loc, loadVal, fir::getBase(lhs));
-      }
-    }
+    copyVar(sym, *lhs_sb, *rhs_sb);
 
     if (copyAssignIP && copyAssignIP->isSet() &&
         sym.test(Fortran::semantics::Symbol::Flag::OmpLastPrivate)) {
@@ -1092,6 +1040,79 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     return true;
   }
 
+  void copyVar(const Fortran::semantics::Symbol &sym,
+               const Fortran::lower::SymbolBox &lhs_sb,
+               const Fortran::lower::SymbolBox &rhs_sb) {
+    mlir::Location loc = genLocation(sym.name());
+    if (lowerToHighLevelFIR())
+      copyVarHLFIR(loc, lhs_sb.getAddr(), rhs_sb.getAddr());
+    else
+      copyVarFIR(loc, sym, lhs_sb, rhs_sb);
+  }
+
+  void copyVarHLFIR(mlir::Location loc, mlir::Value dst, mlir::Value src) {
+    assert(lowerToHighLevelFIR());
+    hlfir::Entity lhs{dst};
+    hlfir::Entity rhs{src};
+    // Temporary_lhs is set to true in hlfir.assign below to avoid user
+    // assignment to be used and finalization to be called on the LHS.
+    // This may or may not be correct but mimics the current behaviour
+    // without HLFIR.
+    auto copyData = [&](hlfir::Entity l, hlfir::Entity r) {
+      // Dereference RHS and load it if trivial scalar.
+      r = hlfir::loadTrivialScalar(loc, *builder, r);
+      builder->create<hlfir::AssignOp>(
+          loc, r, l,
+          /*isWholeAllocatableAssignment=*/false,
+          /*keepLhsLengthInAllocatableAssignment=*/false,
+          /*temporary_lhs=*/true);
+    };
+    if (lhs.isAllocatable()) {
+      // Deep copy allocatable if it is allocated.
+      // Note that when allocated, the RHS is already allocated with the LHS
+      // shape for copy on entry in createHostAssociateVarClone.
+      // For lastprivate, this assumes that the RHS was not reallocated in
+      // the OpenMP region.
+      lhs = hlfir::derefPointersAndAllocatables(loc, *builder, lhs);
+      mlir::Value addr = hlfir::genVariableRawAddress(loc, *builder, lhs);
+      mlir::Value isAllocated = builder->genIsNotNullAddr(loc, addr);
+      builder->genIfThen(loc, isAllocated)
+          .genThen([&]() {
+            // Copy the DATA, not the descriptors.
+            copyData(lhs, rhs);
+          })
+          .end();
+    } else if (lhs.isPointer()) {
+      // Set LHS target to the target of RHS (do not copy the RHS
+      // target data into the LHS target storage).
+      auto loadVal = builder->create<fir::LoadOp>(loc, rhs);
+      builder->create<fir::StoreOp>(loc, loadVal, lhs);
+    } else {
+      // Non ALLOCATABLE/POINTER variable. Simple DATA copy.
+      copyData(lhs, rhs);
+    }
+  }
+
+  void copyVarFIR(mlir::Location loc, const Fortran::semantics::Symbol &sym,
+                  const Fortran::lower::SymbolBox &lhs_sb,
+                  const Fortran::lower::SymbolBox &rhs_sb) {
+    assert(!lowerToHighLevelFIR());
+    fir::ExtendedValue lhs = symBoxToExtendedValue(lhs_sb);
+    fir::ExtendedValue rhs = symBoxToExtendedValue(rhs_sb);
+    mlir::Type symType = genType(sym);
+    if (auto seqTy = symType.dyn_cast<fir::SequenceType>()) {
+      Fortran::lower::StatementContext stmtCtx;
+      Fortran::lower::createSomeArrayAssignment(*this, lhs, rhs, localSymbols,
+                                                stmtCtx);
+      stmtCtx.finalizeAndReset();
+    } else if (lhs.getBoxOf<fir::CharBoxValue>()) {
+      fir::factory::CharacterExprHelper{*builder, loc}.createAssign(lhs, rhs);
+    } else {
+      auto loadVal = builder->create<fir::LoadOp>(loc, fir::getBase(rhs));
+      builder->create<fir::StoreOp>(loc, loadVal, fir::getBase(lhs));
+    }
+  }
+
   /// Map a block argument to a result or dummy symbol. This is not the
   /// definitive mapping. The specification expression have not been lowered
   /// yet. The final mapping will be done using this pre-mapping in
diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 7dd25f75d9eb76..12d68faed856ca 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -592,6 +592,10 @@ class ClauseProcessor {
   processAllocate(llvm::SmallVectorImpl<mlir::Value> &allocatorOperands,
                   llvm::SmallVectorImpl<mlir::Value> &allocateOperands) const;
   bool processCopyin() const;
+  bool processCopyPrivate(
+      mlir::Location currentLocation,
+      llvm::SmallVectorImpl<mlir::Value> &copyPrivateVars,
+      llvm::SmallVectorImpl<mlir::Attribute> &copyPrivateFuncs) const;
   bool processDepend(llvm::SmallVectorImpl<mlir::Attribute> &dependTypeOperands,
                      llvm::SmallVectorImpl<mlir::Value> &dependOperands) const;
   bool
@@ -1160,6 +1164,156 @@ class ReductionProcessor {
   }
 };
 
+/// Class that extracts information from the specified type.
+class TypeInfo {
+public:
+  TypeInfo(mlir::Location loc, mlir::Type ty) : loc(loc) {
+    name = typeScan(ty);
+  }
+
+  // Returns a textual representation of the type, with characters that are
+  // valid in identifiers.
+  const std::string &getName() const { return name; }
+
+  // Returns the length of character types.
+  std::optional<fir::CharacterType::LenType> getCharLength() const {
+    return charLen;
+  }
+
+  // Returns the shape of array types.
+  const llvm::SmallVector<int64_t> &getShape() const { return shape; }
+
+  // Is the type inside a box?
+  bool isBox() const { return inBox; }
+
+private:
+  // Scan type and return an unique name for it.
+  std::string typeScan(mlir::Type type);
+
+  mlir::Location loc;
+  std::string name;
+  std::optional<fir::CharacterType::LenType> charLen;
+  llvm::SmallVector<int64_t> shape;
+  bool inBox = false;
+};
+
+std::string TypeInfo::typeScan(mlir::Type ty) {
+  std::ostringstream ss;
+
+  auto unexpectedType = [&] {
+    std::string errmsg;
+    llvm::raw_string_ostream rss(errmsg);
+    rss << "Unexpected type: " << ty;
+    fir::emitFatalError(loc, errmsg);
+  };
+
+  if (auto aty = mlir::dyn_cast<fir::SequenceType>(ty)) {
+    // array -> A<rank>(_<extent>)+_<eleTy>
+    assert(shape.empty() && !aty.getShape().empty());
+    shape = llvm::SmallVector<int64_t>(aty.getShape());
+    ss << "A" << aty.getShape().size();
+    for (auto extent : aty.getShape()) {
+      assert(extent > 0 ||
+             extent == aty.getUnknownExtent() && "Unexpected array extent");
+      if (extent == aty.getUnknownExtent())
+        ss << "_u";
+      else
+        ss << "_" << extent;
+    }
+    ss << "_" << typeScan(aty.getEleTy());
+  } else if (auto dty = mlir::dyn_cast<fir::RecordType>(ty)) {
+    ss << "D" << dty.getName().str();
+  } else if (auto bty = mlir::dyn_cast<fir::BoxType>(ty)) {
+    inBox = true;
+    // allocatable (box<heap<...>>)
+    if (auto hty = mlir::dyn_cast<fir::HeapType>(bty.getEleTy()))
+      ss << "H" << typeScan(hty.getEleTy());
+    // pointer (box<ptr<...>>)
+    else if (auto pty = mlir::dyn_cast<fir::PointerType>(bty.getEleTy()))
+      ss << "P" << typeScan(pty.getEleTy());
+    else
+      unexpectedType();
+  } else if (auto sty = mlir::dyn_cast<fir::CharacterType>(ty)) {
+    // character -> s<kind>l<len>
+    fir::CharacterType::LenType len = sty.getLen();
+    assert(len > 0 || len == fir::CharacterType::unknownLen() &&
+                          "Unexpected character length");
+    charLen = len;
+    ss << "s" << sty.getFKind() << "l";
+    if (len == fir::CharacterType::unknownLen())
+      ss << "u";
+    else
+      ss << len;
+  } else if (auto cty = mlir::dyn_cast<fir::ComplexType>(ty)) {
+    ss << "c" << cty.getFKind();
+  } else if (auto lty = mlir::dyn_cast<fir::LogicalType>(ty)) {
+    ss << "l" << lty.getFKind();
+  } else if (ty.isIntOrIndexOrFloat()) {
+    if (ty.isIntOrIndex())
+      ss << "i";
+    else
+      ss << "f";
+    ss << ty.getIntOrFloatBitWidth();
+  } else {
+    unexpectedType();
+  }
+  return ss.str();
+}
+
+// Create a function that performs a copy between two variables, compatible
+// with their types and attributes.
+static mlir::func::FuncOp
+createCopyFunc(mlir::Location loc, Fortran::lower::AbstractConverter &converter,
+               mlir::Type varType, fir::FortranVariableFlagsEnum varAttrs) {
+  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+  mlir::ModuleOp module = builder.getModule();
+  TypeInfo typeInfo(loc,
+                    mlir::cast<fir::ReferenceType>(varType).getElementType());
+  std::string copyFuncName = std::string("_copy_") + typeInfo.getName();
+
+  if (auto decl = module.lookupSymbol<mlir::func::FuncOp>(copyFuncName))
+    return decl;
+
+  // create function
+  mlir::OpBuilder::InsertionGuard guard(builder);
+  mlir::OpBuilder modBuilder(module.getBodyRegion());
+  llvm::SmallVector<mlir::Type> argsTy = {varType, varType};
+  auto funcType = mlir::FunctionType::get(builder.getContext(), argsTy, {});
+  mlir::func::FuncOp funcOp =
+      modBuilder.create<mlir::func::FuncOp>(loc, copyFuncName, funcType);
+  funcOp.setVisibility(mlir::SymbolTable::Visibility::Private);
+  builder.createBlock(&funcOp.getRegion(), funcOp.getRegion().end(), argsTy,
+                      {loc, loc});
+  builder.setInsertionPointToStart(&funcOp.getRegion().back());
+  // generate body
+  fir::FortranVariableFlagsAttr attrs;
+  if (varAttrs != fir::FortranVariableFlagsEnum::None)
+    attrs = fir::FortranVariableFlagsAttr::get(builder.getContext(), varAttrs);
+  llvm::SmallVector<mlir::Value> typeparams;
+  if (typeInfo.getCharLength().has_value()) {
+    mlir::Value charLen = builder.createIntegerConstant(
+        loc, builder.getCharacterLengthType(), *typeInfo.getCharLength());
+    typeparams.push_back(charLen);
+  }
+  mlir::Value shape;
+  if (!typeInfo.isBox() && !typeInfo.getShape().empty()) {
+    llvm::SmallVector<mlir::Value> extents;
+    for (auto extent : typeInfo.getShape())
+      extents.push_back(
+          builder.createIntegerConstant(loc, builder.getIndexType(), extent));
+    shape = builder.create<fir::ShapeOp>(loc, extents);
+  }
+  auto declDst = builder.create<hlfir::DeclareOp>(loc, funcOp.getArgument(0),
+                                                  copyFuncName + "_dst", shape,
+                                                  typeparams, attrs);
+  auto declSrc = builder.create<hlfir::DeclareOp>(loc, funcOp.getArgument(1),
+                                                  copyFuncName + "_src", shape,
+                                                  typeparams, attrs);
+  converter.copyVar(loc, declDst.getBase(), declSrc.getBase());
+  builder.create<mlir::func::ReturnOp>(loc);
+  return funcOp;
+}
+
 static mlir::omp::ScheduleModifier
 translateScheduleModifier(const Fortran::parser::OmpScheduleModifierType &m) {
   switch (m.v) {
@@ -1740,6 +1894,46 @@ bool ClauseProcessor::processCopyin() const {
   return hasCopyin;
 }
 
+bool ClauseProcessor::processCopyPrivate(
+    mlir::Location currentLocation,
+    llvm::SmallVectorImpl<mlir::Value> &copyPrivateVars,
+    llvm::SmallVectorImpl<mlir::Attribute> &copyPrivateFuncs) const {
+  auto addCopyPrivateVar = [&](Fortran::semantics::Symbol *sym) {
+    mlir::Value symVal = converter.getSymbolAddress(*sym);
+    auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>();
+    if (!declOp)
+      fir::emitFatalError(currentLocation,
+                          "COPYPRIVATE is supported only in HLFIR mode");
+    symVal = declOp.getBase();
+    fir::FortranVariableFlagsEnum attrs = fir::FortranVariableFlagsEnum::None;
+    if (declOp.getFortranAttrs().has_value())
+      attrs = *declOp.getFortranAttrs();
+    copyPrivateVars.push_back(symVal);
+    mlir::func::FuncOp funcOp =
+        createCopyFunc(currentLocation, converter, symVal.getType(), attrs);
+    copyPrivateFuncs.push_back(mlir::SymbolRefAttr::get(funcOp));
+  };
+
+  bool hasCopyPrivate = findRepeatableClause<ClauseTy::Copyprivate>(
+      [&](const ClauseTy::Copyprivate *copyPrivateClause,
+          const Fortran::parser::CharBlock &) {
+        const Fortran::parser::OmpObjectList &ompObjectList =
+            copyPrivateClause->v;
+        for (const Fortran::parser::OmpObject &ompObject : ompObjectList.v) {
+          Fortran::semantics::Symbol *sym = getOmpObjectSymbol(ompObject);
+          if (const auto *commonDetails =
+                  sym->detailsIf<Fortran::semantics::CommonBlockDetails>()) {
+            for (const auto &mem : commonDetails->objects())
+              addCopyPrivateVar(&*mem);
+            break;
+          }
+          addCopyPrivateVar(sym);
+        }
+      });
+
+  return hasCopyPrivate;
+}
+
 bool ClauseProcessor::processDepend(
     llvm::SmallVectorImpl<mlir::Attribute> &dependTypeOperands,
     llvm::SmallVectorImpl<mlir::Value> &dependOperands) const {
@@ -2481,19 +2675,26 @@ genSingleOp(Fortran::lower::AbstractConverter &converter,
             const Fortran::parser::OmpClauseList &beginClauseList,
             const Fortran::parser::OmpClauseList &endClauseList) {
   llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands;
+  llvm::SmallVector<mlir::Value> copyPrivateVars;
+  llvm::SmallVector<mlir::Attribute> copyPrivateFuncs;
   mlir::UnitAttr nowaitAttr;
 
   ClauseProcessor cp(converter, beginClauseList);
   cp.processAllocate(allocatorOperands, allocateOperands);
-  cp.processTODO<Fortran::parser::OmpClause::Copyprivate>(
-      currentLocation, llvm::omp::Directive::OMPD_single);
 
-  ClauseProcessor(converter, endClauseList).processNowait(nowaitAttr);
+  ClauseProcessor ecp(converter, endClauseList);
+  ecp.processNowait(nowaitAttr);
+  ecp.processCopyPrivate(currentLocation, copyPrivateVars, copyPrivateFuncs);
 
   return genOpWithBody<mlir::omp::SingleOp>(
       converter, eval, genNested, currentLocation,
       /*outerCombined=*/false, &beginClauseList, allocateOperands,
-      allocatorOperands, nowaitAttr);
+      allocatorOperands, copyPrivateVars,
+      copyPrivateFuncs.empty()
+          ? nullptr
+          : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
+                                 copyPrivateFuncs),
+      nowaitAttr);
 }
 
 static mlir::omp::TaskOp
@@ -3367,7 +3568,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
 
   for (const auto &clause : endClauseList.v) {
     mlir::Location clauseLocation = converter.genLocation(clause.source);
-    if (!std::get_if<Fortran::parser::OmpClause::Nowait>(&clause.u))
+    if (!std::get_if<Fortran::parser::OmpClause::Nowait>(&clause.u) &&
+        !std::get_if<Fortran::parser::OmpClause::Copyprivate>(&clause.u))
       TODO(clauseLocation, "OpenMP Block construct clause");
   }
 
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 2c570bc3abeb20..ef5a01c1ca8215 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -2429,7 +2429,8 @@ void OmpAttributeVisitor::CheckDataCopyingClause(
       // either 'private' or 'threadprivate' in enclosing context.
       if (!checkSymbol->test(Symbol::Flag::OmpThreadprivate) &&
           !(HasSymbolInEnclosingScope(symbol, currScope()) &&
-              symbol.test(Symbol::Flag::OmpPrivate))) {
+              (symbol.test(Symbol::Flag::OmpPrivate) ||
+                  symbol.test(Symbol::Flag::OmpFirstPrivate)))) {
         context_.Say(name.source,
             "COPYPRIVATE variable '%s' is not PRIVATE or THREADPRIVATE in "
             "outer context"_err_en_US,
diff --git a/flang/test/Lower/OpenMP/Todo/copyprivate.f90 b/flang/test/Lower/OpenMP/Todo/copyprivate.f90
deleted file mode 100644
index 0d871427ce60ff..00000000000000
--- a/flang/test/Lower/OpenMP/Todo/copyprivate.f90
+++ /dev/null
@@ -1,13 +0,0 @@
-! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-
-! CHECK: not yet implemented: OpenMP Block construct clause
-subroutine sb
-  integer, save :: a
-  !$omp threadprivate(a)
-  !$omp parallel
-  !$omp single
-  a = 3
-  !$omp end single copyprivate(a)
-  !$omp end parallel
-end subroutine
diff --git a/flang/test/Lower/OpenMP/copyprivate.f90 b/flang/test/Lower/OpenMP/copyprivate.f90
new file mode 100644
index 00000000000000..0d2740c77896f0
--- /dev/null
+++ b/flang/test/Lower/OpenMP/copyprivate.f90
@@ -0,0 +1,48 @@
+! Test COPYPRIVATE.
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+!CHECK-DAG: func private @_copy_f32(%{{.*}}: !fir.ref<f32>, %{{.*}}: !fir.ref<f32>)
+!CHECK-DAG: func private @_copy_A1_10_i32(%{{.*}}: !fir.ref<!fir.array<10xi32>>, %{{.*}}: !fir.ref<!fir.array<10xi32>>)
+
+!CHECK-LABEL: func private @_copy_i32(
+!CHECK-SAME:                  %[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>) {
+!CHECK-NEXT:    %[[DST:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_copy_i32_dst"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK-NEXT:    %[[SRC:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_copy_i32_src"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK-NEXT:    %[[SRC_VAL:.*]] = fir.load %[[SRC]]#0 : !fir.ref<i32>
+!CHECK-NEXT:    hlfir.assign %[[SRC_VAL]] to %[[DST]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK-NEXT:    return
+!CHECK-NEXT:  }
+
+!CHECK-LABEL: func @_QPtest_scalar
+!CHECK:         omp.parallel
+!CHECK:           %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:           %[[J:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:           %[[K:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEk"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:           omp.single copyprivate(%[[I]]#0 -> @_copy_i32 : !fir.ref<i32>, %[[J]]#0 -> @_copy_i32 : !fir.ref<i32>, %[[K]]#0 -> @_copy_f32 : !fir.ref<f32>)
+subroutine test_scalar()
+  integer, save :: i, j
+  !$omp threadprivate(i, j)
+  real :: k
+
+  k = 33.3
+  !$omp parallel firstprivate(k)
+  !$omp single
+  i = 11
+  j = 22
+  !$omp end single copyprivate(i, j, k)
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_array
+!CHECK:         omp.parallel
+!CHECK:           %[[A:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+!CHECK:           omp.single copyprivate(%[[A]]#0 -> @_copy_A1_10_i32 : !fir.ref<!fir.array<10xi32>>)
+subroutine test_array()
+  integer :: a(10)
+
+  !$omp parallel private(a)
+  !$omp single
+  a = 100
+  !$omp end single copyprivate(a)
+  !$omp end parallel
+end subroutine
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 669104307fa0e2..96835a6f6edce3 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1819,12 +1819,16 @@ class OpenMPIRBuilder {
   /// \param FiniCB Callback to finalize variable copies.
   /// \param IsNowait If false, a barrier is emitted.
   /// \param DidIt Local variable used as a flag to indicate 'single' thread
+  /// \param CPVars copyprivate variables.
+  /// \param CPFuncs copy functions to use for each copyprivate variable.
   ///
   /// \returns The insertion position *after* the single call.
   InsertPointTy createSingle(const LocationDescription &Loc,
                              BodyGenCallbackTy BodyGenCB,
                              FinalizeCallbackTy FiniCB, bool IsNowait,
-                             llvm::Value *DidIt);
+                             llvm::Value *DidIt,
+                             const SmallVector<llvm::Value *> &CPVars = {},
+                             const SmallVector<llvm::Function *> &CPFuncs = {});
 
   /// Generator for '#omp master'
   ///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index f6cf358119fb71..30cbb105e829ac 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3992,7 +3992,9 @@ OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc,
 
 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle(
     const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
-    FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt) {
+    FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt,
+    const SmallVector<llvm::Value *> &CPVars,
+    const SmallVector<llvm::Function *> &CPFuncs) {
 
   if (!updateToLocation(Loc))
     return Loc.IP;
@@ -4015,17 +4017,33 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle(
   Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
   Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
 
+  auto FiniCBWrapper = [&](InsertPointTy IP) {
+    FiniCB(IP);
+
+    if (DidIt)
+      Builder.CreateStore(Builder.getInt32(1), DidIt);
+  };
+
   // generates the following:
   // if (__kmpc_single()) {
   //		.... single region ...
   // 		__kmpc_end_single
   // }
+  // __kmpc_copyprivate
   // __kmpc_barrier
 
-  EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
+  EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
                        /*Conditional*/ true,
                        /*hasFinalize*/ true);
-  if (!IsNowait)
+
+  if (DidIt) {
+    for (size_t I = 0, E = CPVars.size(); I < E; ++I)
+      // NOTE BufSize is currently unused, so just pass 0.
+      createCopyPrivate(LocationDescription(Builder.saveIP(), Loc.DL),
+                        /*BufSize=*/ConstantInt::get(Int64, 0), CPVars[I],
+                        CPFuncs[I], DidIt);
+    // NOTE __kmpc_copyprivate already inserts a barrier
+  } else if (!IsNowait)
     createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
                   omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
                   /* CheckCancelFlag */ false);
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index d614f2666a85ab..088327c35b44f2 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -387,10 +387,16 @@ def SingleOp : OpenMP_Op<"single", [AttrSizedOperandSegments]> {
     master thread), in the context of its implicit task. The other threads
     in the team, which do not execute the block, wait at an implicit barrier
     at the end of the single construct unless a nowait clause is specified.
+
+    If copyprivate variables and functions are specified, then each thread
+    variable is updated with the variable value of the thread that executed
+    the single region, using the specified copy functions.
   }];
 
   let arguments = (ins Variadic<AnyType>:$allocate_vars,
                        Variadic<AnyType>:$allocators_vars,
+                       Variadic<OpenMP_PointerLikeType>:$copyprivate_vars,
+                       OptionalAttr<SymbolRefArrayAttr>:$copyprivate_funcs,
                        UnitAttr:$nowait);
 
   let regions = (region AnyRegion:$region);
@@ -402,6 +408,10 @@ def SingleOp : OpenMP_Op<"single", [AttrSizedOperandSegments]> {
                 $allocators_vars, type($allocators_vars)
               ) `)`
           |`nowait` $nowait
+          |`copyprivate` `(`
+              custom<CopyPrivateVarList>(
+                $copyprivate_vars, type($copyprivate_vars), $copyprivate_funcs
+              ) `)`
     ) $region attr-dict
   }];
   let hasVerifier = 1;
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 2d3be76c65e817..54f71fcdd7e351 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -505,6 +505,108 @@ static LogicalResult verifyReductionVarList(Operation *op,
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// Parser, printer and verifier for CopyPrivateVarList
+//===----------------------------------------------------------------------===//
+
+/// copyprivate-entry-list ::= copyprivate-entry
+///                          | copyprivate-entry-list `,` copyprivate-entry
+/// copyprivate-entry ::= ssa-id `->` symbol-ref `:` type
+static ParseResult parseCopyPrivateVarList(
+    OpAsmParser &parser,
+    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &operands,
+    SmallVectorImpl<Type> &types, ArrayAttr &copyPrivateSymbols) {
+  SmallVector<SymbolRefAttr> copyPrivateFuncsVec;
+  if (failed(parser.parseCommaSeparatedList([&]() {
+        if (parser.parseOperand(operands.emplace_back()) ||
+            parser.parseArrow() ||
+            parser.parseAttribute(copyPrivateFuncsVec.emplace_back()) ||
+            parser.parseColonType(types.emplace_back()))
+          return failure();
+        return success();
+      })))
+    return failure();
+  SmallVector<Attribute> copyPrivateFuncs(copyPrivateFuncsVec.begin(),
+                                          copyPrivateFuncsVec.end());
+  copyPrivateSymbols = ArrayAttr::get(parser.getContext(), copyPrivateFuncs);
+  return success();
+}
+
+/// Print CopyPrivate clause
+static void printCopyPrivateVarList(OpAsmPrinter &p, Operation *op,
+                                    OperandRange copyPrivateVars,
+                                    TypeRange copyPrivateTypes,
+                                    std::optional<ArrayAttr> copyPrivateFuncs) {
+  assert(copyPrivateFuncs.has_value() || copyPrivateVars.empty());
+  for (unsigned i = 0, e = copyPrivateVars.size(); i < e; ++i) {
+    if (i != 0)
+      p << ", ";
+    p << copyPrivateVars[i] << " -> " << (*copyPrivateFuncs)[i] << " : "
+      << copyPrivateTypes[i];
+  }
+}
+
+/// Verifies CopyPrivate Clause
+static LogicalResult
+verifyCopyPrivateVarList(Operation *op, OperandRange copyPrivateVars,
+                         std::optional<ArrayAttr> copyPrivateFuncs) {
+  if (!copyPrivateVars.empty()) {
+    if (!copyPrivateFuncs || copyPrivateFuncs->size() != copyPrivateVars.size())
+      return op->emitOpError() << "expected as many copyPrivate functions as "
+                                  "copyPrivate variables";
+  } else {
+    if (copyPrivateFuncs)
+      return op->emitOpError() << "unexpected copyPrivate functions";
+    return success();
+  }
+
+  for (auto args : llvm::zip(copyPrivateVars, *copyPrivateFuncs)) {
+    auto symbolRef = llvm::cast<SymbolRefAttr>(std::get<1>(args));
+    std::optional<std::variant<mlir::func::FuncOp, mlir::LLVM::LLVMFuncOp>>
+        funcOp;
+    if (mlir::func::FuncOp mlirFuncOp =
+            SymbolTable::lookupNearestSymbolFrom<mlir::func::FuncOp>(op,
+                                                                     symbolRef))
+      funcOp = mlirFuncOp;
+    else if (mlir::LLVM::LLVMFuncOp llvmFuncOp =
+                 SymbolTable::lookupNearestSymbolFrom<mlir::LLVM::LLVMFuncOp>(
+                     op, symbolRef))
+      funcOp = llvmFuncOp;
+
+    auto getNumArguments = [&] {
+      return std::visit([](auto &f) { return f.getArguments().size(); },
+                        *funcOp);
+    };
+
+    auto getArgumentType = [&](unsigned i) {
+      return std::visit([i](auto &f) { return f.getArgument(i).getType(); },
+                        *funcOp);
+    };
+
+    if (!funcOp)
+      return op->emitOpError() << "expected symbol reference " << symbolRef
+                               << " to point to a copy function";
+
+    if (getNumArguments() != 2)
+      return op->emitOpError()
+             << "expected copy function " << symbolRef << " to have 2 operands";
+
+    Type argTy = getArgumentType(0);
+    if (argTy != getArgumentType(1))
+      return op->emitOpError() << "expected copy function " << symbolRef
+                               << " arguments to have the same type";
+
+    Type varType = std::get<0>(args).getType();
+    if (argTy != varType)
+      return op->emitOpError()
+             << "expected copy function arguments' type (" << argTy
+             << ") to be the same as copyprivate variable's type (" << varType
+             << ")";
+  }
+
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // Parser, printer and verifier for DependVarList
 //===----------------------------------------------------------------------===//
@@ -1072,7 +1174,8 @@ LogicalResult SingleOp::verify() {
     return emitError(
         "expected equal sizes for allocate and allocator variables");
 
-  return success();
+  return verifyCopyPrivateVarList(*this, getCopyprivateVars(),
+                                  getCopyprivateFuncs());
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 23e101f1e45272..964a1aeb5a00dd 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -656,8 +656,26 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
                         moduleTranslation, bodyGenStatus);
   };
   auto finiCB = [&](InsertPointTy codeGenIP) {};
+
+  // Handle copyprivate
+  Operation::operand_range cpVars = singleOp.getCopyprivateVars();
+  std::optional<ArrayAttr> cpFuncs = singleOp.getCopyprivateFuncs();
+  llvm::SmallVector<llvm::Value *> llvmCPVars;
+  llvm::SmallVector<llvm::Function *> llvmCPFuncs;
+  for (size_t i = 0, e = cpVars.size(); i < e; ++i) {
+    llvmCPVars.push_back(moduleTranslation.lookupValue(cpVars[i]));
+    auto llvmFuncOp = SymbolTable::lookupNearestSymbolFrom<LLVM::LLVMFuncOp>(
+        singleOp, cast<SymbolRefAttr>((*cpFuncs)[i]));
+    llvmCPFuncs.push_back(
+        moduleTranslation.lookupFunction(llvmFuncOp.getName()));
+  }
+  llvm::Value *didIt = nullptr;
+  if (!llvmCPVars.empty())
+    didIt = builder.CreateAlloca(llvm::Type::getInt32Ty(builder.getContext()));
+
   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle(
-      ompLoc, bodyCB, finiCB, singleOp.getNowait(), /*DidIt=*/nullptr));
+      ompLoc, bodyCB, finiCB, singleOp.getNowait(), didIt, llvmCPVars,
+      llvmCPFuncs));
   return bodyGenStatus;
 }
 

>From 3a5e062577d72b24fd9fb43cb8744ba9890d799c Mon Sep 17 00:00:00 2001
From: Leandro Lupori <leandro.lupori at linaro.org>
Date: Wed, 20 Dec 2023 15:35:38 -0300
Subject: [PATCH 2/8] Fix mlir/test/Dialect/OpenMP/invalid.mlir

---
 mlir/test/Dialect/OpenMP/invalid.mlir | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index 2b0e86ddd22bbc..12542016126248 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -1284,7 +1284,7 @@ func.func @omp_single(%data_var : memref<i32>) -> () {
   // expected-error @below {{expected equal sizes for allocate and allocator variables}}
   "omp.single" (%data_var) ({
     omp.barrier
-  }) {operandSegmentSizes = array<i32: 1,0>} : (memref<i32>) -> ()
+  }) {operandSegmentSizes = array<i32: 1,0,0>} : (memref<i32>) -> ()
   return
 }
 

>From 029c36432e6e454e5088b38cbc793a426569e825 Mon Sep 17 00:00:00 2001
From: Leandro Lupori <leandro.lupori at linaro.org>
Date: Thu, 21 Dec 2023 12:54:39 -0300
Subject: [PATCH 3/8] Use getTypeAsString() to build copy function name

---
 flang/lib/Lower/OpenMP.cpp              | 91 ++++++-------------------
 flang/test/Lower/OpenMP/copyprivate.f90 |  4 +-
 2 files changed, 21 insertions(+), 74 deletions(-)

diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 12d68faed856ca..58b128fd8f0d8c 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -22,6 +22,7 @@
 #include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Parser/dump-parse-tree.h"
 #include "flang/Parser/parse-tree.h"
@@ -1167,13 +1168,7 @@ class ReductionProcessor {
 /// Class that extracts information from the specified type.
 class TypeInfo {
 public:
-  TypeInfo(mlir::Location loc, mlir::Type ty) : loc(loc) {
-    name = typeScan(ty);
-  }
-
-  // Returns a textual representation of the type, with characters that are
-  // valid in identifiers.
-  const std::string &getName() const { return name; }
+  TypeInfo(mlir::Type ty) { typeScan(ty); }
 
   // Returns the length of character types.
   std::optional<fir::CharacterType::LenType> getCharLength() const {
@@ -1187,77 +1182,28 @@ class TypeInfo {
   bool isBox() const { return inBox; }
 
 private:
-  // Scan type and return an unique name for it.
-  std::string typeScan(mlir::Type type);
+  void typeScan(mlir::Type type);
 
-  mlir::Location loc;
-  std::string name;
   std::optional<fir::CharacterType::LenType> charLen;
   llvm::SmallVector<int64_t> shape;
   bool inBox = false;
 };
 
-std::string TypeInfo::typeScan(mlir::Type ty) {
-  std::ostringstream ss;
-
-  auto unexpectedType = [&] {
-    std::string errmsg;
-    llvm::raw_string_ostream rss(errmsg);
-    rss << "Unexpected type: " << ty;
-    fir::emitFatalError(loc, errmsg);
-  };
-
-  if (auto aty = mlir::dyn_cast<fir::SequenceType>(ty)) {
-    // array -> A<rank>(_<extent>)+_<eleTy>
-    assert(shape.empty() && !aty.getShape().empty());
-    shape = llvm::SmallVector<int64_t>(aty.getShape());
-    ss << "A" << aty.getShape().size();
-    for (auto extent : aty.getShape()) {
-      assert(extent > 0 ||
-             extent == aty.getUnknownExtent() && "Unexpected array extent");
-      if (extent == aty.getUnknownExtent())
-        ss << "_u";
-      else
-        ss << "_" << extent;
-    }
-    ss << "_" << typeScan(aty.getEleTy());
-  } else if (auto dty = mlir::dyn_cast<fir::RecordType>(ty)) {
-    ss << "D" << dty.getName().str();
+void TypeInfo::typeScan(mlir::Type ty) {
+  if (auto sty = mlir::dyn_cast<fir::SequenceType>(ty)) {
+    assert(shape.empty() && !sty.getShape().empty());
+    shape = llvm::SmallVector<int64_t>(sty.getShape());
+    typeScan(sty.getEleTy());
   } else if (auto bty = mlir::dyn_cast<fir::BoxType>(ty)) {
     inBox = true;
-    // allocatable (box<heap<...>>)
-    if (auto hty = mlir::dyn_cast<fir::HeapType>(bty.getEleTy()))
-      ss << "H" << typeScan(hty.getEleTy());
-    // pointer (box<ptr<...>>)
-    else if (auto pty = mlir::dyn_cast<fir::PointerType>(bty.getEleTy()))
-      ss << "P" << typeScan(pty.getEleTy());
-    else
-      unexpectedType();
-  } else if (auto sty = mlir::dyn_cast<fir::CharacterType>(ty)) {
-    // character -> s<kind>l<len>
-    fir::CharacterType::LenType len = sty.getLen();
-    assert(len > 0 || len == fir::CharacterType::unknownLen() &&
-                          "Unexpected character length");
-    charLen = len;
-    ss << "s" << sty.getFKind() << "l";
-    if (len == fir::CharacterType::unknownLen())
-      ss << "u";
-    else
-      ss << len;
-  } else if (auto cty = mlir::dyn_cast<fir::ComplexType>(ty)) {
-    ss << "c" << cty.getFKind();
-  } else if (auto lty = mlir::dyn_cast<fir::LogicalType>(ty)) {
-    ss << "l" << lty.getFKind();
-  } else if (ty.isIntOrIndexOrFloat()) {
-    if (ty.isIntOrIndex())
-      ss << "i";
-    else
-      ss << "f";
-    ss << ty.getIntOrFloatBitWidth();
-  } else {
-    unexpectedType();
+    typeScan(bty.getEleTy());
+  } else if (auto cty = mlir::dyn_cast<fir::CharacterType>(ty)) {
+    charLen = cty.getLen();
+  } else if (auto hty = mlir::dyn_cast<fir::HeapType>(ty)) {
+    typeScan(hty.getEleTy());
+  } else if (auto pty = mlir::dyn_cast<fir::PointerType>(ty)) {
+    typeScan(pty.getEleTy());
   }
-  return ss.str();
 }
 
 // Create a function that performs a copy between two variables, compatible
@@ -1267,9 +1213,10 @@ createCopyFunc(mlir::Location loc, Fortran::lower::AbstractConverter &converter,
                mlir::Type varType, fir::FortranVariableFlagsEnum varAttrs) {
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
   mlir::ModuleOp module = builder.getModule();
-  TypeInfo typeInfo(loc,
-                    mlir::cast<fir::ReferenceType>(varType).getElementType());
-  std::string copyFuncName = std::string("_copy_") + typeInfo.getName();
+  mlir::Type eleTy = mlir::cast<fir::ReferenceType>(varType).getEleTy();
+  TypeInfo typeInfo(eleTy);
+  std::string copyFuncName =
+      fir::getTypeAsString(eleTy, builder.getKindMap(), "_copy");
 
   if (auto decl = module.lookupSymbol<mlir::func::FuncOp>(copyFuncName))
     return decl;
diff --git a/flang/test/Lower/OpenMP/copyprivate.f90 b/flang/test/Lower/OpenMP/copyprivate.f90
index 0d2740c77896f0..bce4e07c931210 100644
--- a/flang/test/Lower/OpenMP/copyprivate.f90
+++ b/flang/test/Lower/OpenMP/copyprivate.f90
@@ -2,7 +2,7 @@
 ! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
 
 !CHECK-DAG: func private @_copy_f32(%{{.*}}: !fir.ref<f32>, %{{.*}}: !fir.ref<f32>)
-!CHECK-DAG: func private @_copy_A1_10_i32(%{{.*}}: !fir.ref<!fir.array<10xi32>>, %{{.*}}: !fir.ref<!fir.array<10xi32>>)
+!CHECK-DAG: func private @_copy_10xi32(%{{.*}}: !fir.ref<!fir.array<10xi32>>, %{{.*}}: !fir.ref<!fir.array<10xi32>>)
 
 !CHECK-LABEL: func private @_copy_i32(
 !CHECK-SAME:                  %[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>) {
@@ -36,7 +36,7 @@ subroutine test_scalar()
 !CHECK-LABEL: func @_QPtest_array
 !CHECK:         omp.parallel
 !CHECK:           %[[A:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
-!CHECK:           omp.single copyprivate(%[[A]]#0 -> @_copy_A1_10_i32 : !fir.ref<!fir.array<10xi32>>)
+!CHECK:           omp.single copyprivate(%[[A]]#0 -> @_copy_10xi32 : !fir.ref<!fir.array<10xi32>>)
 subroutine test_array()
   integer :: a(10)
 

>From a45b09bdf9e230aa81173b64b7c50fc7c1d63c3b Mon Sep 17 00:00:00 2001
From: Leandro Lupori <leandro.lupori at linaro.org>
Date: Thu, 21 Dec 2023 15:35:52 -0300
Subject: [PATCH 4/8] Add integration test

---
 flang/test/Integration/OpenMP/copyprivate.f90 | 55 +++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 flang/test/Integration/OpenMP/copyprivate.f90

diff --git a/flang/test/Integration/OpenMP/copyprivate.f90 b/flang/test/Integration/OpenMP/copyprivate.f90
new file mode 100644
index 00000000000000..dccb156e12531f
--- /dev/null
+++ b/flang/test/Integration/OpenMP/copyprivate.f90
@@ -0,0 +1,55 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
+
+!CHECK-LABEL: define void @_copy_i32(
+!CHECK-SAME:                         ptr %[[DST:.*]], ptr %[[SRC:.*]]) {
+!CHECK-NEXT:    %[[SRC_VAL:.*]] = load i32, ptr %[[SRC]]
+!CHECK-NEXT:    store i32 %[[SRC_VAL]], ptr %[[DST]]
+!CHECK-NEXT:    ret void
+!CHECK-NEXT:  }
+
+!CHECK-LABEL: define internal void @test_scalar_..omp_par({{.*}})
+!CHECK:         %[[I:.*]] = alloca i32, i64 1
+!CHECK:         %[[J:.*]] = alloca i32, i64 1
+!CHECK:         %[[DID_IT:.*]] = alloca i32
+!CHECK:         store i32 0, ptr %[[DID_IT]]
+!CHECK:         %[[THREAD_NUM1:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[LOC:.*]])
+!CHECK:         %[[RET:.*]] = call i32 @__kmpc_single({{.*}})
+!CHECK:         %[[NOT_ZERO:.*]] = icmp ne i32 %[[RET]], 0
+!CHECK:         br i1 %[[NOT_ZERO]], label %[[OMP_REGION_BODY:.*]], label %[[OMP_REGION_END:.*]]
+
+!CHECK:       [[OMP_REGION_END]]:
+!CHECK:         %[[THREAD_NUM2:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[LOC:.*]])
+!CHECK:         %[[DID_IT_VAL:.*]] = load i32, ptr %[[DID_IT]]
+!CHECK:         call void @__kmpc_copyprivate(ptr @[[LOC]], i32 %[[THREAD_NUM2]], i64 0, ptr %[[I]], ptr @_copy_i32, i32 %[[DID_IT_VAL]])
+!CHECK:         %[[THREAD_NUM3:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[LOC]])
+!CHECK:         %[[DID_IT_VAL2:.*]] = load i32, ptr %[[DID_IT]]
+!CHECK:         call void @__kmpc_copyprivate(ptr @[[LOC]], i32 %[[THREAD_NUM3]], i64 0, ptr %[[J]], ptr @_copy_i32, i32 %[[DID_IT_VAL2]])
+
+!CHECK:       [[OMP_REGION_BODY]]:
+!CHECK:         br label %[[OMP_SINGLE_REGION:.*]]
+!CHECK:       [[OMP_SINGLE_REGION]]:
+!CHECK:         store i32 11, ptr %[[I]]
+!CHECK:         store i32 22, ptr %[[J]]
+!CHECK:         br label %[[OMP_REGION_CONT3:.*]]
+!CHECK:       [[OMP_REGION_CONT3:.*]]:
+!CHECK:         store i32 1, ptr %[[DID_IT]]
+!CHECK:         call void @__kmpc_end_single(ptr @[[LOC]], i32 %[[THREAD_NUM1]])
+!CHECK:         br label %[[OMP_REGION_END]]
+subroutine test_scalar()
+  integer :: i, j
+
+  !$omp parallel private(i, j)
+  !$omp single
+  i = 11
+  j = 22
+  !$omp end single copyprivate(i, j)
+  !$omp end parallel
+end subroutine

>From d5b44dee70477bb2a2acd8549b080b900357513b Mon Sep 17 00:00:00 2001
From: Leandro Lupori <leandro.lupori at linaro.org>
Date: Thu, 21 Dec 2023 15:45:07 -0300
Subject: [PATCH 5/8] Use ArrayRef in OMPIRBuilder's createSingle()

---
 llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h | 4 ++--
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 96835a6f6edce3..ab92c172c75ae6 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1827,8 +1827,8 @@ class OpenMPIRBuilder {
                              BodyGenCallbackTy BodyGenCB,
                              FinalizeCallbackTy FiniCB, bool IsNowait,
                              llvm::Value *DidIt,
-                             const SmallVector<llvm::Value *> &CPVars = {},
-                             const SmallVector<llvm::Function *> &CPFuncs = {});
+                             ArrayRef<llvm::Value *> CPVars = {},
+                             ArrayRef<llvm::Function *> CPFuncs = {});
 
   /// Generator for '#omp master'
   ///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 30cbb105e829ac..a874669298dfdd 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3993,8 +3993,8 @@ OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc,
 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle(
     const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
     FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt,
-    const SmallVector<llvm::Value *> &CPVars,
-    const SmallVector<llvm::Function *> &CPFuncs) {
+    ArrayRef<llvm::Value *> CPVars,
+    ArrayRef<llvm::Function *> CPFuncs) {
 
   if (!updateToLocation(Loc))
     return Loc.IP;

>From 0bc008a78b2dd5cb1cdd0ce3828ed39a9c3c8f29 Mon Sep 17 00:00:00 2001
From: Leandro Lupori <leandro.lupori at linaro.org>
Date: Thu, 21 Dec 2023 15:52:12 -0300
Subject: [PATCH 6/8] clang-format

---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index a874669298dfdd..7abac0f660ef8a 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3993,8 +3993,7 @@ OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc,
 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle(
     const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
     FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt,
-    ArrayRef<llvm::Value *> CPVars,
-    ArrayRef<llvm::Function *> CPFuncs) {
+    ArrayRef<llvm::Value *> CPVars, ArrayRef<llvm::Function *> CPFuncs) {
 
   if (!updateToLocation(Loc))
     return Loc.IP;

>From f2f25b8fdb6414ef147d39df12b2c38eac9d5740 Mon Sep 17 00:00:00 2001
From: Leandro Lupori <leandro.lupori at linaro.org>
Date: Thu, 4 Jan 2024 16:44:25 -0300
Subject: [PATCH 7/8] Add missing tests

---
 flang/test/Integration/OpenMP/copyprivate.f90 |  39 ++++++
 flang/test/Lower/OpenMP/copyprivate.f90       | 130 ++++++++++++++++--
 .../Frontend/OpenMPIRBuilderTest.cpp          | 111 +++++++++++++++
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  |   5 +-
 mlir/test/Dialect/OpenMP/invalid.mlir         |  46 +++++++
 mlir/test/Dialect/OpenMP/ops.mlir             |  17 +++
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      |  32 +++++
 7 files changed, 366 insertions(+), 14 deletions(-)

diff --git a/flang/test/Integration/OpenMP/copyprivate.f90 b/flang/test/Integration/OpenMP/copyprivate.f90
index dccb156e12531f..eb8b46e64e5b7d 100644
--- a/flang/test/Integration/OpenMP/copyprivate.f90
+++ b/flang/test/Integration/OpenMP/copyprivate.f90
@@ -8,6 +8,21 @@
 
 !RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
 
+!CHECK-DAG: define void @_copy_10xi32(ptr %{{.*}}, ptr %{{.*}})
+!CHECK-DAG: define void @_copy_i64(ptr %{{.*}}, ptr %{{.*}})
+!CHECK-DAG: define void @_copy_f32(ptr %{{.*}}, ptr %{{.*}})
+!CHECK-DAG: define void @_copy_2x3xf32(ptr %{{.*}}, ptr %{{.*}})
+!CHECK-DAG: define void @_copy_z32(ptr %{{.*}}, ptr %{{.*}})
+!CHECK-DAG: define void @_copy_10xz32(ptr %{{.*}}, ptr %{{.*}})
+!CHECK-DAG: define void @_copy_l32(ptr %{{.*}}, ptr %{{.*}})
+!CHECK-DAG: define void @_copy_5xl32(ptr %{{.*}}, ptr %{{.*}})
+!CHECK-DAG: define void @_copy_c8x8(ptr %{{.*}}, ptr %{{.*}})
+!CHECK-DAG: define void @_copy_10xc8x8(ptr %{{.*}}, ptr %{{.*}})
+!CHECK-DAG: define void @_copy_c16x5(ptr %{{.*}}, ptr %{{.*}})
+!CHECK-DAG: define void @_copy_rec__QFtest_typesTdt(ptr %{{.*}}, ptr %{{.*}})
+!CHECK-DAG: define void @_copy_box_heap_Uxi32(ptr %{{.*}}, ptr %{{.*}})
+!CHECK-DAG: define void @_copy_box_ptr_Uxc8x9(ptr %{{.*}}, ptr %{{.*}})
+
 !CHECK-LABEL: define void @_copy_i32(
 !CHECK-SAME:                         ptr %[[DST:.*]], ptr %[[SRC:.*]]) {
 !CHECK-NEXT:    %[[SRC_VAL:.*]] = load i32, ptr %[[SRC]]
@@ -53,3 +68,27 @@ subroutine test_scalar()
   !$omp end single copyprivate(i, j)
   !$omp end parallel
 end subroutine
+
+subroutine test_types()
+  integer(4) :: i4, i4a(10)
+  integer(8) :: i8
+  real :: r, ra(2, 3)
+  complex :: z, za(10)
+  logical :: l, la(5)
+  character(kind=1, len=8) :: c1, c1a(10)
+  character(kind=2, len=5) :: c2
+
+  type dt
+    integer :: i
+    real :: r
+  end type
+  type(dt) :: t
+
+  integer, allocatable :: aloc(:)
+  character(kind=1, len=9), pointer :: ptr(:)
+
+  !$omp parallel private(i4, i4a, i8, r, ra, z, za, l, la, c1, c1a, c2, t, aloc, ptr)
+  !$omp single
+  !$omp end single copyprivate(i4, i4a, i8, r, ra, z, za, l, la, c1, c1a, c2, t, aloc, ptr)
+  !$omp end parallel
+end subroutine
diff --git a/flang/test/Lower/OpenMP/copyprivate.f90 b/flang/test/Lower/OpenMP/copyprivate.f90
index bce4e07c931210..4c90b8c2108fc8 100644
--- a/flang/test/Lower/OpenMP/copyprivate.f90
+++ b/flang/test/Lower/OpenMP/copyprivate.f90
@@ -1,8 +1,32 @@
 ! Test COPYPRIVATE.
 ! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
 
+!CHECK-DAG: func private @_copy_i64(%{{.*}}: !fir.ref<i64>, %{{.*}}: !fir.ref<i64>)
 !CHECK-DAG: func private @_copy_f32(%{{.*}}: !fir.ref<f32>, %{{.*}}: !fir.ref<f32>)
+!CHECK-DAG: func private @_copy_f64(%{{.*}}: !fir.ref<f64>, %{{.*}}: !fir.ref<f64>)
+!CHECK-DAG: func private @_copy_z32(%{{.*}}: !fir.ref<!fir.complex<4>>, %{{.*}}: !fir.ref<!fir.complex<4>>)
+!CHECK-DAG: func private @_copy_z64(%{{.*}}: !fir.ref<!fir.complex<8>>, %{{.*}}: !fir.ref<!fir.complex<8>>)
+!CHECK-DAG: func private @_copy_l32(%{{.*}}: !fir.ref<!fir.logical<4>>, %{{.*}}: !fir.ref<!fir.logical<4>>)
+!CHECK-DAG: func private @_copy_l64(%{{.*}}: !fir.ref<!fir.logical<8>>, %{{.*}}: !fir.ref<!fir.logical<8>>)
+!CHECK-DAG: func private @_copy_c8x3(%{{.*}}: !fir.ref<!fir.char<1,3>>, %{{.*}}: !fir.ref<!fir.char<1,3>>)
+!CHECK-DAG: func private @_copy_c8x8(%{{.*}}: !fir.ref<!fir.char<1,8>>, %{{.*}}: !fir.ref<!fir.char<1,8>>)
+!CHECK-DAG: func private @_copy_c16x8(%{{.*}}: !fir.ref<!fir.char<2,8>>, %{{.*}}: !fir.ref<!fir.char<2,8>>)
+
 !CHECK-DAG: func private @_copy_10xi32(%{{.*}}: !fir.ref<!fir.array<10xi32>>, %{{.*}}: !fir.ref<!fir.array<10xi32>>)
+!CHECK-DAG: func private @_copy_3x4xi32(%{{.*}}: !fir.ref<!fir.array<3x4xi32>>, %{{.*}}: !fir.ref<!fir.array<3x4xi32>>)
+!CHECK-DAG: func private @_copy_10xf32(%{{.*}}: !fir.ref<!fir.array<10xf32>>, %{{.*}}: !fir.ref<!fir.array<10xf32>>)
+!CHECK-DAG: func private @_copy_3x4xz32(%{{.*}}: !fir.ref<!fir.array<3x4x!fir.complex<4>>>, %{{.*}}: !fir.ref<!fir.array<3x4x!fir.complex<4>>>)
+!CHECK-DAG: func private @_copy_10xl32(%{{.*}}: !fir.ref<!fir.array<10x!fir.logical<4>>>, %{{.*}}: !fir.ref<!fir.array<10x!fir.logical<4>>>)
+!CHECK-DAG: func private @_copy_3xc8x8(%{{.*}}: !fir.ref<!fir.array<3x!fir.char<1,8>>>, %{{.*}}: !fir.ref<!fir.array<3x!fir.char<1,8>>>)
+!CHECK-DAG: func private @_copy_3xc16x5(%{{.*}}: !fir.ref<!fir.array<3x!fir.char<2,5>>>, %{{.*}}: !fir.ref<!fir.array<3x!fir.char<2,5>>>)
+
+!CHECK-DAG: func private @_copy_rec__QFtest_dtTdt(%{{.*}}: !fir.ref<!fir.type<_QFtest_dtTdt{i:i32,r:f32}>>, %{{.*}}: !fir.ref<!fir.type<_QFtest_dtTdt{i:i32,r:f32}>>)
+!CHECK-DAG: func private @_copy_box_heap_Uxi32(%{{.*}}: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+!CHECK-DAG: func private @_copy_box_heap_i32(%{{.*}}: !fir.ref<!fir.box<!fir.heap<i32>>>, %{{.*}}: !fir.ref<!fir.box<!fir.heap<i32>>>)
+!CHECK-DAG: func private @_copy_box_ptr_i32(%{{.*}}: !fir.ref<!fir.box<!fir.ptr<i32>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<i32>>>)
+!CHECK-DAG: func private @_copy_box_ptr_Uxf32(%{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)
+!CHECK-DAG: func private @_copy_box_heap_Uxc8x5(%{{.*}}: !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>)
+!CHECK-DAG: func private @_copy_box_ptr_Uxc8x9(%{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,9>>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,9>>>>>)
 
 !CHECK-LABEL: func private @_copy_i32(
 !CHECK-SAME:                  %[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>) {
@@ -13,13 +37,13 @@
 !CHECK-NEXT:    return
 !CHECK-NEXT:  }
 
-!CHECK-LABEL: func @_QPtest_scalar
+!CHECK-LABEL: func @_QPtest_tp
 !CHECK:         omp.parallel
-!CHECK:           %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:           %[[J:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:           %[[K:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEk"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:           %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_tpEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:           %[[J:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_tpEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:           %[[K:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_tpEk"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 !CHECK:           omp.single copyprivate(%[[I]]#0 -> @_copy_i32 : !fir.ref<i32>, %[[J]]#0 -> @_copy_i32 : !fir.ref<i32>, %[[K]]#0 -> @_copy_f32 : !fir.ref<f32>)
-subroutine test_scalar()
+subroutine test_tp()
   integer, save :: i, j
   !$omp threadprivate(i, j)
   real :: k
@@ -33,16 +57,100 @@ subroutine test_scalar()
   !$omp end parallel
 end subroutine
 
+!CHECK-LABEL: func @_QPtest_scalar
+!CHECK:         omp.parallel
+!CHECK:           %[[I1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:           %[[I2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi2"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+!CHECK:           %[[I3:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi3"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+!CHECK:           %[[R1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEr1"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:           %[[R2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEr2"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+!CHECK:           %[[C1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEc1"} : (!fir.ref<!fir.complex<4>>) -> (!fir.ref<!fir.complex<4>>, !fir.ref<!fir.complex<4>>)
+!CHECK:           %[[C2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEc2"} : (!fir.ref<!fir.complex<8>>) -> (!fir.ref<!fir.complex<8>>, !fir.ref<!fir.complex<8>>)
+!CHECK:           %[[L1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEl1"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+!CHECK:           %[[L2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEl2"} : (!fir.ref<!fir.logical<8>>) -> (!fir.ref<!fir.logical<8>>, !fir.ref<!fir.logical<8>>)
+!CHECK:           %[[S1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEs1"} : (!fir.ref<!fir.char<1,3>>, index) -> (!fir.ref<!fir.char<1,3>>, !fir.ref<!fir.char<1,3>>)
+!CHECK:           %[[S2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEs2"} : (!fir.ref<!fir.char<1,8>>, index) -> (!fir.ref<!fir.char<1,8>>, !fir.ref<!fir.char<1,8>>)
+!CHECK:           %[[S3:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEs3"} : (!fir.ref<!fir.char<2,8>>, index) -> (!fir.ref<!fir.char<2,8>>, !fir.ref<!fir.char<2,8>>)
+!CHECK:           omp.single copyprivate(%[[I1]]#0 -> @_copy_i32 : !fir.ref<i32>, %[[I2]]#0 -> @_copy_i64 : !fir.ref<i64>, %[[I3]]#0 -> @_copy_i64 : !fir.ref<i64>, %[[R1]]#0 -> @_copy_f32 : !fir.ref<f32>, %[[R2]]#0 -> @_copy_f64 : !fir.ref<f64>, %[[C1]]#0 -> @_copy_z32 : !fir.ref<!fir.complex<4>>, %[[C2]]#0 -> @_copy_z64 : !fir.ref<!fir.complex<8>>, %[[L1]]#0 -> @_copy_l32 : !fir.ref<!fir.logical<4>>, %[[L2]]#0 -> @_copy_l64 : !fir.ref<!fir.logical<8>>, %[[S1]]#0 -> @_copy_c8x3 : !fir.ref<!fir.char<1,3>>, %[[S2]]#0 -> @_copy_c8x8 : !fir.ref<!fir.char<1,8>>, %[[S3]]#0 -> @_copy_c16x8 : !fir.ref<!fir.char<2,8>>)
+subroutine test_scalar()
+  integer(4) :: i1
+  integer(8) :: i2, i3
+  real(4) :: r1
+  real(8) :: r2
+  complex(4) :: c1
+  complex(8) :: c2
+  logical(4) :: l1
+  logical(8) :: l2
+  character(kind=1, len=3) :: s1
+  character(kind=1, len=8) :: s2
+  character(kind=2, len=8) :: s3
+
+  !$omp parallel private(i1, i2, i3, r1, r2, c1, c2, l1, l2, s1, s2, s3)
+  !$omp single
+  !$omp end single copyprivate(i1, i2, i3, r1, r2, c1, c2, l1, l2, s1, s2, s3)
+  !$omp end parallel
+end subroutine
+
 !CHECK-LABEL: func @_QPtest_array
 !CHECK:         omp.parallel
-!CHECK:           %[[A:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
-!CHECK:           omp.single copyprivate(%[[A]]#0 -> @_copy_10xi32 : !fir.ref<!fir.array<10xi32>>)
+!CHECK:           %[[I1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi1"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+!CHECK:           %[[I2:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi2"} : (!fir.ref<!fir.array<3x4xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x4xi32>>, !fir.ref<!fir.array<3x4xi32>>)
+!CHECK:           %[[R1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEr1"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
+!CHECK:           %[[C1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEc1"} : (!fir.ref<!fir.array<3x4x!fir.complex<4>>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x4x!fir.complex<4>>>, !fir.ref<!fir.array<3x4x!fir.complex<4>>>)
+!CHECK:           %[[L1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEl1"} : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.ref<!fir.array<10x!fir.logical<4>>>)
+!CHECK:           %[[S1:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_arrayEs1"} : (!fir.ref<!fir.array<3x!fir.char<1,8>>>, !fir.shape<1>, index) -> (!fir.ref<!fir.array<3x!fir.char<1,8>>>, !fir.ref<!fir.array<3x!fir.char<1,8>>>)
+!CHECK:           %[[S2:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_arrayEs2"} : (!fir.ref<!fir.array<3x!fir.char<2,5>>>, !fir.shape<1>, index) -> (!fir.ref<!fir.array<3x!fir.char<2,5>>>, !fir.ref<!fir.array<3x!fir.char<2,5>>>)
+!CHECK:           omp.single copyprivate(%[[I1]]#0 -> @_copy_10xi32 : !fir.ref<!fir.array<10xi32>>, %[[I2]]#0 -> @_copy_3x4xi32 : !fir.ref<!fir.array<3x4xi32>>,  %[[R1]]#0 -> @_copy_10xf32 : !fir.ref<!fir.array<10xf32>>, %[[C1]]#0 -> @_copy_3x4xz32 : !fir.ref<!fir.array<3x4x!fir.complex<4>>>, %[[L1]]#0 -> @_copy_10xl32 : !fir.ref<!fir.array<10x!fir.logical<4>>>, %[[S1]]#0 -> @_copy_3xc8x8 : !fir.ref<!fir.array<3x!fir.char<1,8>>>, %[[S2]]#0 -> @_copy_3xc16x5 : !fir.ref<!fir.array<3x!fir.char<2,5>>>)
 subroutine test_array()
-  integer :: a(10)
+  integer :: i1(10), i2(3, 4)
+  real :: r1(10)
+  complex :: c1(3, 4)
+  logical :: l1(10)
+  character(8) :: s1(3)
+  character(kind=2, len=5) :: s2(3)
+
+  !$omp parallel private(i1, i2, r1, c1, l1, s1, s2)
+  !$omp single
+  !$omp end single copyprivate(i1, i2, r1, c1, l1, s1, s2)
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_dt
+!CHECK:         omp.parallel
+!CHECK:           %[[T:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_dtEt"} : (!fir.ref<!fir.type<_QFtest_dtTdt{i:i32,r:f32}>>) -> (!fir.ref<!fir.type<_QFtest_dtTdt{i:i32,r:f32}>>, !fir.ref<!fir.type<_QFtest_dtTdt{i:i32,r:f32}>>)
+!CHECK:           omp.single copyprivate(%[[T]]#0 -> @_copy_rec__QFtest_dtTdt : !fir.ref<!fir.type<_QFtest_dtTdt{i:i32,r:f32}>>)
+subroutine test_dt()
+  type dt
+    integer :: i
+    real :: r
+  end type
+  type(dt) :: t
+
+  !$omp parallel private(t)
+  !$omp single
+  !$omp end single copyprivate(t)
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_attr
+!CHECK:         omp.parallel
+!CHECK:           %[[I1:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_attrEi1"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+!CHECK:           %[[I2:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_attrEi2"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
+!CHECK:           %[[I3:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_attrEi3"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
+!CHECK:           %[[R1:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_attrEr1"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)
+!CHECK:           %[[C1:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_attrEc1"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>)
+!CHECK:           %[[C2:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_attrEc2"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,9>>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,9>>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,9>>>>>)
+!CHECK:           omp.single copyprivate(%[[I1]]#0 -> @_copy_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %[[I2:.*]]#0 -> @_copy_box_heap_i32 : !fir.ref<!fir.box<!fir.heap<i32>>>, %[[I3]]#0 -> @_copy_box_ptr_i32 : !fir.ref<!fir.box<!fir.ptr<i32>>>, %[[R1]]#0 -> @_copy_box_ptr_Uxf32 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>,  %[[C1]]#0 -> @_copy_box_heap_Uxc8x5 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>, %[[C2]]#0 -> @_copy_box_ptr_Uxc8x9 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,9>>>>>)
+subroutine test_attr()
+  integer, allocatable :: i1(:)
+  integer, allocatable :: i2
+  integer, pointer :: i3
+  real, pointer :: r1(:)
+  character(kind=1, len=5), allocatable :: c1(:)
+  character(kind=1, len=9), pointer :: c2(:)
 
-  !$omp parallel private(a)
+  !$omp parallel private(i1, i2, i3, r1, c1, c2)
   !$omp single
-  a = 100
-  !$omp end single copyprivate(a)
+  !$omp end single copyprivate(i1, i2, i3, r1, c1, c2)
   !$omp end parallel
 end subroutine
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index e79d0bb2f65aea..0eb1039aa442ce 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -3464,6 +3464,117 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) {
   EXPECT_EQ(ExitBarrier, nullptr);
 }
 
+TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  F->setName("func");
+  IRBuilder<> Builder(BB);
+
+  OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+
+  AllocaInst *PrivAI = nullptr;
+
+  BasicBlock *EntryBB = nullptr;
+  BasicBlock *ThenBB = nullptr;
+
+  Value *CPVar = Builder.CreateAlloca(F->arg_begin()->getType());
+  Builder.CreateStore(F->arg_begin(), CPVar);
+
+  FunctionType *CopyFuncTy = FunctionType::get(
+      Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getPtrTy()}, false);
+  Function *CopyFunc =
+      Function::Create(CopyFuncTy, Function::PrivateLinkage, "copy_var", *M);
+
+  Value *DidIt = Builder.CreateAlloca(Type::getInt32Ty(Builder.getContext()));
+
+  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+    if (AllocaIP.isSet())
+      Builder.restoreIP(AllocaIP);
+    else
+      Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
+    PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
+    Builder.CreateStore(F->arg_begin(), PrivAI);
+
+    llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
+    llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
+    EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
+
+    Builder.restoreIP(CodeGenIP);
+
+    // collect some info for checks later
+    ThenBB = Builder.GetInsertBlock();
+    EntryBB = ThenBB->getUniquePredecessor();
+
+    // simple instructions for body
+    Value *PrivLoad =
+        Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
+    Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
+  };
+
+  auto FiniCB = [&](InsertPointTy IP) {
+    BasicBlock *IPBB = IP.getBlock();
+    EXPECT_NE(IPBB->end(), IP.getPoint());
+  };
+
+  Builder.restoreIP(OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB,
+                                            /*IsNowait*/ false, DidIt, {CPVar},
+                                            {CopyFunc}));
+  Value *EntryBBTI = EntryBB->getTerminator();
+  EXPECT_NE(EntryBBTI, nullptr);
+  EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
+  BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
+  EXPECT_TRUE(EntryBr->isConditional());
+  EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
+  BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
+  EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
+
+  CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
+  EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
+
+  CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
+  EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
+  EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
+  EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
+
+  CallInst *SingleEndCI = nullptr;
+  for (auto &FI : *ThenBB) {
+    Instruction *Cur = &FI;
+    if (isa<CallInst>(Cur)) {
+      SingleEndCI = cast<CallInst>(Cur);
+      if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single")
+        break;
+      SingleEndCI = nullptr;
+    }
+  }
+  EXPECT_NE(SingleEndCI, nullptr);
+  EXPECT_EQ(SingleEndCI->arg_size(), 2U);
+  EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
+  EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
+
+  CallInst *CopyPrivateCI = nullptr;
+  bool FoundBarrier = false;
+  for (auto &FI : *ExitBB) {
+    Instruction *Cur = &FI;
+    if (auto *CI = dyn_cast<CallInst>(Cur)) {
+      if (CI->getCalledFunction()->getName() == "__kmpc_barrier")
+        FoundBarrier = true;
+      else if (CI->getCalledFunction()->getName() == "__kmpc_copyprivate")
+        CopyPrivateCI = CI;
+    }
+  }
+  EXPECT_FALSE(FoundBarrier);
+  EXPECT_NE(CopyPrivateCI, nullptr);
+  EXPECT_EQ(CopyPrivateCI->arg_size(), 6U);
+  EXPECT_TRUE(isa<AllocaInst>(CopyPrivateCI->getArgOperand(3)));
+  EXPECT_EQ(CopyPrivateCI->getArgOperand(3), CPVar);
+  EXPECT_TRUE(isa<Function>(CopyPrivateCI->getArgOperand(4)));
+  EXPECT_EQ(CopyPrivateCI->getArgOperand(4), CopyFunc);
+  EXPECT_TRUE(isa<LoadInst>(CopyPrivateCI->getArgOperand(5)));
+  LoadInst *DidItLI = cast<LoadInst>(CopyPrivateCI->getArgOperand(5));
+  EXPECT_EQ(DidItLI->getOperand(0), DidIt);
+}
+
 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadFlt) {
   OpenMPIRBuilder OMPBuilder(*M);
   OMPBuilder.initialize();
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 54f71fcdd7e351..b64391fb248ba1 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -574,12 +574,11 @@ verifyCopyPrivateVarList(Operation *op, OperandRange copyPrivateVars,
       funcOp = llvmFuncOp;
 
     auto getNumArguments = [&] {
-      return std::visit([](auto &f) { return f.getArguments().size(); },
-                        *funcOp);
+      return std::visit([](auto &f) { return f.getNumArguments(); }, *funcOp);
     };
 
     auto getArgumentType = [&](unsigned i) {
-      return std::visit([i](auto &f) { return f.getArgument(i).getType(); },
+      return std::visit([i](auto &f) { return f.getArgumentTypes()[i]; },
                         *funcOp);
     };
 
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index 12542016126248..2089cbccc9c2d0 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -1290,6 +1290,52 @@ func.func @omp_single(%data_var : memref<i32>) -> () {
 
 // -----
 
+func.func @omp_single_copyprivate(%data_var : memref<i32>) -> () {
+  // expected-error @below {{expected symbol reference @copy_func to point to a copy function}}
+  omp.single copyprivate(%data_var -> @copy_func : memref<i32>) {
+    omp.barrier
+  }
+  return
+}
+
+// -----
+
+func.func private @copy_func(memref<i32>)
+
+func.func @omp_single_copyprivate(%data_var : memref<i32>) -> () {
+  // expected-error @below {{expected copy function @copy_func to have 2 operands}}
+  omp.single copyprivate(%data_var -> @copy_func : memref<i32>) {
+    omp.barrier
+  }
+  return
+}
+
+// -----
+
+func.func private @copy_func(memref<i32>, memref<f32>)
+
+func.func @omp_single_copyprivate(%data_var : memref<i32>) -> () {
+  // expected-error @below {{expected copy function @copy_func arguments to have the same type}}
+  omp.single copyprivate(%data_var -> @copy_func : memref<i32>) {
+    omp.barrier
+  }
+  return
+}
+
+// -----
+
+func.func private @copy_func(memref<f32>, memref<f32>)
+
+func.func @omp_single_copyprivate(%data_var : memref<i32>) -> () {
+  // expected-error @below {{expected copy function arguments' type ('memref<f32>') to be the same as copyprivate variable's type ('memref<i32>')}}
+  omp.single copyprivate(%data_var -> @copy_func : memref<i32>) {
+    omp.barrier
+  }
+  return
+}
+
+// -----
+
 func.func @omp_task_depend(%data_var: memref<i32>) {
   // expected-error @below {{op expected as many depend values as depend variables}}
     "omp.task"(%data_var) ({
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 3d4f6435572f7f..af7879652a1a92 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -1577,6 +1577,23 @@ func.func @omp_single_multiple_blocks() {
   return
 }
 
+func.func private @copy_i32(memref<i32>, memref<i32>)
+
+// CHECK-LABEL: func @omp_single_copyprivate
+func.func @omp_single_copyprivate(%data_var: memref<i32>) {
+  omp.parallel {
+    // CHECK: omp.single copyprivate(%{{.*}} -> @copy_i32 : memref<i32>) {
+    omp.single copyprivate(%data_var -> @copy_i32 : memref<i32>) {
+      "test.payload"() : () -> ()
+      // CHECK: omp.terminator
+      omp.terminator
+    }
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
 // CHECK-LABEL: @omp_task
 // CHECK-SAME: (%[[bool_var:.*]]: i1, %[[i64_var:.*]]: i64, %[[i32_var:.*]]: i32, %[[data_var:.*]]: memref<i32>)
 func.func @omp_task(%bool_var: i1, %i64_var: i64, %i32_var: i32, %data_var: memref<i32>) {
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 29baa84e7e19d3..8a3d5d6407659d 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -2165,6 +2165,38 @@ llvm.func @single_nowait(%x: i32, %y: i32, %zaddr: !llvm.ptr) {
 
 // -----
 
+llvm.func @copy_i32(!llvm.ptr, !llvm.ptr)
+llvm.func @copy_f32(!llvm.ptr, !llvm.ptr)
+
+// CHECK-LABEL: @single_copyprivate
+// CHECK-SAME: (ptr %[[ip:.*]], ptr %[[fp:.*]])
+llvm.func @single_copyprivate(%ip: !llvm.ptr, %fp: !llvm.ptr) {
+  // CHECK: call i32 @__kmpc_single
+  omp.single copyprivate(%ip -> @copy_i32 : !llvm.ptr, %fp -> @copy_f32 : !llvm.ptr) {
+    // CHECK: %[[i:.*]] = load i32, ptr %[[ip]]
+    %i = llvm.load %ip : !llvm.ptr -> i32
+    // CHECK: %[[i2:.*]] = add i32 %[[i]], %[[i]]
+    %i2 = llvm.add %i, %i : i32
+    // CHECK: store i32 %[[i2]], ptr %[[ip]]
+    llvm.store %i2, %ip : i32, !llvm.ptr
+    // CHECK: %[[f:.*]] = load float, ptr %[[fp]]
+    %f = llvm.load %fp : !llvm.ptr -> f32
+    // CHECK: %[[f2:.*]] = fadd float %[[f]], %[[f]]
+    %f2 = llvm.fadd %f, %f : f32
+    // CHECK: store float %[[f2]], ptr %[[fp]]
+    llvm.store %f2, %fp : f32, !llvm.ptr
+    // CHECK: call void @__kmpc_end_single
+    // CHECK: call void @__kmpc_copyprivate({{.*}}, ptr %[[ip]], ptr @copy_i32, {{.*}})
+    // CHECK: call void @__kmpc_copyprivate({{.*}}, ptr %[[fp]], ptr @copy_f32, {{.*}})
+    // CHECK-NOT: call void @__kmpc_barrier
+    omp.terminator
+  }
+  // CHECK: ret void
+  llvm.return
+}
+
+// -----
+
 // CHECK: @_QFsubEx = internal global i32 undef
 // CHECK: @_QFsubEx.cache = common global ptr null
 

>From ef5a0b74ab8bca7de0114d044533f1c5eadd1ef3 Mon Sep 17 00:00:00 2001
From: Leandro Lupori <leandro.lupori at linaro.org>
Date: Mon, 22 Jan 2024 10:36:28 -0300
Subject: [PATCH 8/8] Add support for vla/assumed-shape arrays

---
 flang/lib/Lower/OpenMP.cpp                    | 26 +++++++++++++++----
 flang/test/Integration/OpenMP/copyprivate.f90 | 11 +++++---
 flang/test/Lower/OpenMP/copyprivate.f90       | 18 +++++++++----
 3 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 58b128fd8f0d8c..7011cfe58f7858 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -1852,12 +1852,28 @@ bool ClauseProcessor::processCopyPrivate(
       fir::emitFatalError(currentLocation,
                           "COPYPRIVATE is supported only in HLFIR mode");
     symVal = declOp.getBase();
-    fir::FortranVariableFlagsEnum attrs = fir::FortranVariableFlagsEnum::None;
-    if (declOp.getFortranAttrs().has_value())
-      attrs = *declOp.getFortranAttrs();
-    copyPrivateVars.push_back(symVal);
+    mlir::Type symType = symVal.getType();
+    fir::FortranVariableFlagsEnum attrs =
+        declOp.getFortranAttrs().has_value()
+            ? *declOp.getFortranAttrs()
+            : fir::FortranVariableFlagsEnum::None;
+    mlir::Value cpVar = symVal;
+
+    // CopyPrivate variables must be passed by reference. However, in the case
+    // of assumed shapes/vla the type is not a !fir.ref, but a !fir.box.
+    // In these cases to retrieve the appropriate !fir.ref<!fir.box<...>> to
+    // access the data we need we must perform an alloca and then store to it
+    // and retrieve the data from the new alloca.
+    if (mlir::isa<fir::BaseBoxType>(symType)) {
+      fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+      auto alloca = builder.create<fir::AllocaOp>(currentLocation, symType);
+      builder.create<fir::StoreOp>(currentLocation, symVal, alloca);
+      cpVar = alloca;
+    }
+
+    copyPrivateVars.push_back(cpVar);
     mlir::func::FuncOp funcOp =
-        createCopyFunc(currentLocation, converter, symVal.getType(), attrs);
+        createCopyFunc(currentLocation, converter, cpVar.getType(), attrs);
     copyPrivateFuncs.push_back(mlir::SymbolRefAttr::get(funcOp));
   };
 
diff --git a/flang/test/Integration/OpenMP/copyprivate.f90 b/flang/test/Integration/OpenMP/copyprivate.f90
index eb8b46e64e5b7d..9318b743a95290 100644
--- a/flang/test/Integration/OpenMP/copyprivate.f90
+++ b/flang/test/Integration/OpenMP/copyprivate.f90
@@ -8,8 +8,10 @@
 
 !RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
 
+!CHECK-DAG: define void @_copy_box_Uxi32(ptr %{{.*}}, ptr %{{.*}})
 !CHECK-DAG: define void @_copy_10xi32(ptr %{{.*}}, ptr %{{.*}})
 !CHECK-DAG: define void @_copy_i64(ptr %{{.*}}, ptr %{{.*}})
+!CHECK-DAG: define void @_copy_box_Uxi64(ptr %{{.*}}, ptr %{{.*}})
 !CHECK-DAG: define void @_copy_f32(ptr %{{.*}}, ptr %{{.*}})
 !CHECK-DAG: define void @_copy_2x3xf32(ptr %{{.*}}, ptr %{{.*}})
 !CHECK-DAG: define void @_copy_z32(ptr %{{.*}}, ptr %{{.*}})
@@ -69,9 +71,10 @@ subroutine test_scalar()
   !$omp end parallel
 end subroutine
 
-subroutine test_types()
+subroutine test_types(a, n)
+  integer :: a(:), n
   integer(4) :: i4, i4a(10)
-  integer(8) :: i8
+  integer(8) :: i8, i8a(n)
   real :: r, ra(2, 3)
   complex :: z, za(10)
   logical :: l, la(5)
@@ -87,8 +90,8 @@ subroutine test_types()
   integer, allocatable :: aloc(:)
   character(kind=1, len=9), pointer :: ptr(:)
 
-  !$omp parallel private(i4, i4a, i8, r, ra, z, za, l, la, c1, c1a, c2, t, aloc, ptr)
+  !$omp parallel private(a, i4, i4a, i8, i8a, r, ra, z, za, l, la, c1, c1a, c2, t, aloc, ptr)
   !$omp single
-  !$omp end single copyprivate(i4, i4a, i8, r, ra, z, za, l, la, c1, c1a, c2, t, aloc, ptr)
+  !$omp end single copyprivate(a, i4, i4a, i8, i8a, r, ra, z, za, l, la, c1, c1a, c2, t, aloc, ptr)
   !$omp end parallel
 end subroutine
diff --git a/flang/test/Lower/OpenMP/copyprivate.f90 b/flang/test/Lower/OpenMP/copyprivate.f90
index 4c90b8c2108fc8..9b76a996ef3e16 100644
--- a/flang/test/Lower/OpenMP/copyprivate.f90
+++ b/flang/test/Lower/OpenMP/copyprivate.f90
@@ -12,6 +12,7 @@
 !CHECK-DAG: func private @_copy_c8x8(%{{.*}}: !fir.ref<!fir.char<1,8>>, %{{.*}}: !fir.ref<!fir.char<1,8>>)
 !CHECK-DAG: func private @_copy_c16x8(%{{.*}}: !fir.ref<!fir.char<2,8>>, %{{.*}}: !fir.ref<!fir.char<2,8>>)
 
+!CHECK-DAG: func private @_copy_box_Uxi32(%{{.*}}: !fir.ref<!fir.box<!fir.array<?xi32>>>, %{{.*}}: !fir.ref<!fir.box<!fir.array<?xi32>>>)
 !CHECK-DAG: func private @_copy_10xi32(%{{.*}}: !fir.ref<!fir.array<10xi32>>, %{{.*}}: !fir.ref<!fir.array<10xi32>>)
 !CHECK-DAG: func private @_copy_3x4xi32(%{{.*}}: !fir.ref<!fir.array<3x4xi32>>, %{{.*}}: !fir.ref<!fir.array<3x4xi32>>)
 !CHECK-DAG: func private @_copy_10xf32(%{{.*}}: !fir.ref<!fir.array<10xf32>>, %{{.*}}: !fir.ref<!fir.array<10xf32>>)
@@ -93,25 +94,32 @@ subroutine test_scalar()
 
 !CHECK-LABEL: func @_QPtest_array
 !CHECK:         omp.parallel
+!CHECK:           %[[A:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEa"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
 !CHECK:           %[[I1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi1"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
 !CHECK:           %[[I2:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi2"} : (!fir.ref<!fir.array<3x4xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x4xi32>>, !fir.ref<!fir.array<3x4xi32>>)
+!CHECK:           %[[I3:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi3"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
 !CHECK:           %[[R1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEr1"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
 !CHECK:           %[[C1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEc1"} : (!fir.ref<!fir.array<3x4x!fir.complex<4>>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x4x!fir.complex<4>>>, !fir.ref<!fir.array<3x4x!fir.complex<4>>>)
 !CHECK:           %[[L1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEl1"} : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.ref<!fir.array<10x!fir.logical<4>>>)
 !CHECK:           %[[S1:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_arrayEs1"} : (!fir.ref<!fir.array<3x!fir.char<1,8>>>, !fir.shape<1>, index) -> (!fir.ref<!fir.array<3x!fir.char<1,8>>>, !fir.ref<!fir.array<3x!fir.char<1,8>>>)
 !CHECK:           %[[S2:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_arrayEs2"} : (!fir.ref<!fir.array<3x!fir.char<2,5>>>, !fir.shape<1>, index) -> (!fir.ref<!fir.array<3x!fir.char<2,5>>>, !fir.ref<!fir.array<3x!fir.char<2,5>>>)
-!CHECK:           omp.single copyprivate(%[[I1]]#0 -> @_copy_10xi32 : !fir.ref<!fir.array<10xi32>>, %[[I2]]#0 -> @_copy_3x4xi32 : !fir.ref<!fir.array<3x4xi32>>,  %[[R1]]#0 -> @_copy_10xf32 : !fir.ref<!fir.array<10xf32>>, %[[C1]]#0 -> @_copy_3x4xz32 : !fir.ref<!fir.array<3x4x!fir.complex<4>>>, %[[L1]]#0 -> @_copy_10xl32 : !fir.ref<!fir.array<10x!fir.logical<4>>>, %[[S1]]#0 -> @_copy_3xc8x8 : !fir.ref<!fir.array<3x!fir.char<1,8>>>, %[[S2]]#0 -> @_copy_3xc16x5 : !fir.ref<!fir.array<3x!fir.char<2,5>>>)
-subroutine test_array()
-  integer :: i1(10), i2(3, 4)
+!CHECK:           %[[A_REF:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
+!CHECK:           fir.store %[[A]]#0 to %[[A_REF]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+!CHECK:           %[[I3_REF:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
+!CHECK:           fir.store %[[I3]]#0 to %[[I3_REF]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+!CHECK:           omp.single copyprivate(%[[A_REF]] -> @_copy_box_Uxi32 : !fir.ref<!fir.box<!fir.array<?xi32>>>, %[[I1]]#0 -> @_copy_10xi32 : !fir.ref<!fir.array<10xi32>>, %[[I2]]#0 -> @_copy_3x4xi32 : !fir.ref<!fir.array<3x4xi32>>, %[[I3_REF]] -> @_copy_box_Uxi32 : !fir.ref<!fir.box<!fir.array<?xi32>>>, %[[R1]]#0 -> @_copy_10xf32 : !fir.ref<!fir.array<10xf32>>, %[[C1]]#0 -> @_copy_3x4xz32 : !fir.ref<!fir.array<3x4x!fir.complex<4>>>, %[[L1]]#0 -> @_copy_10xl32 : !fir.ref<!fir.array<10x!fir.logical<4>>>, %[[S1]]#0 -> @_copy_3xc8x8 : !fir.ref<!fir.array<3x!fir.char<1,8>>>, %[[S2]]#0 -> @_copy_3xc16x5 : !fir.ref<!fir.array<3x!fir.char<2,5>>>)
+subroutine test_array(a, n)
+  integer :: a(:), n
+  integer :: i1(10), i2(3, 4), i3(n)
   real :: r1(10)
   complex :: c1(3, 4)
   logical :: l1(10)
   character(8) :: s1(3)
   character(kind=2, len=5) :: s2(3)
 
-  !$omp parallel private(i1, i2, r1, c1, l1, s1, s2)
+  !$omp parallel private(a, i1, i2, i3, r1, c1, l1, s1, s2)
   !$omp single
-  !$omp end single copyprivate(i1, i2, r1, c1, l1, s1, s2)
+  !$omp end single copyprivate(a, i1, i2, i3, r1, c1, l1, s1, s2)
   !$omp end parallel
 end subroutine
 



More information about the flang-commits mailing list