[flang-commits] [flang] [llvm] [flang][OpenMP] Decompose compound constructs, do recursive lowering (PR #90098)

Krzysztof Parzyszek via flang-commits flang-commits at lists.llvm.org
Tue May 7 08:26:05 PDT 2024


https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/90098

>From 4cf41a60afacefbbc7098309ba1f099b867a8806 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 26 Mar 2024 09:58:18 -0500
Subject: [PATCH 01/15] [flang][OpenMP] Implement getOpenMPVersion helper
 function, NFC

---
 flang/lib/Lower/OpenMP/Utils.cpp | 6 ++++++
 flang/lib/Lower/OpenMP/Utils.h   | 1 +
 2 files changed, 7 insertions(+)

diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index da3f2be73e5095..1400f1f4cc8dd2 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -47,6 +47,12 @@ int64_t getCollapseValue(const List<Clause> &clauses) {
   return 1;
 }
 
+uint32_t getOpenMPVersion(mlir::ModuleOp mod) {
+  if (mlir::Attribute verAttr = mod->getAttr("omp.version"))
+    return llvm::cast<mlir::omp::VersionAttr>(verAttr).getVersion();
+  llvm_unreachable("Expecting OpenMP version attribute in module");
+}
+
 void genObjectList(const ObjectList &objects,
                    Fortran::lower::AbstractConverter &converter,
                    llvm::SmallVectorImpl<mlir::Value> &operands) {
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index b3a9f7f30c98bd..693a91f7cc6954 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -59,6 +59,7 @@ void gatherFuncAndVarSyms(
     llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause);
 
 int64_t getCollapseValue(const List<Clause> &clauses);
+uint32_t getOpenMPVersion(mlir::ModuleOp mod);
 
 Fortran::semantics::Symbol *
 getOmpObjectSymbol(const Fortran::parser::OmpObject &ompObject);

>From 45d5017db069a8804fbdb75da223b68cbaab11f2 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 26 Mar 2024 10:14:13 -0500
Subject: [PATCH 02/15] [flang][OpenMP] Implement getIterationVariableSymbol
 helper function, NFC

---
 flang/lib/Lower/OpenMP/Utils.cpp | 24 +++++++++++++++++++++++-
 flang/lib/Lower/OpenMP/Utils.h   |  6 ++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 1400f1f4cc8dd2..c38e0c18cac88c 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -11,10 +11,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "Utils.h"
-#include "Clauses.h"
 
+#include "Clauses.h"
 #include <flang/Lower/AbstractConverter.h>
 #include <flang/Lower/ConvertType.h>
+#include <flang/Lower/PFTBuilder.h>
 #include <flang/Optimizer/Builder/FIRBuilder.h>
 #include <flang/Parser/parse-tree.h>
 #include <flang/Parser/tools.h>
@@ -87,6 +88,27 @@ mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter,
   return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize);
 }
 
+Fortran::semantics::Symbol *
+getIterationVariableSymbol(const Fortran::lower::pft::Evaluation &eval) {
+  return eval.visit(Fortran::common::visitors{
+      [&](const Fortran::parser::DoConstruct &doLoop) {
+        if (const auto &maybeCtrl = doLoop.GetLoopControl()) {
+          using LoopControl = Fortran::parser::LoopControl;
+          if (auto *bounds = std::get_if<LoopControl::Bounds>(&maybeCtrl->u)) {
+            static_assert(
+                std::is_same_v<decltype(bounds->name),
+                               Fortran::parser::Scalar<Fortran::parser::Name>>);
+            return bounds->name.thing.symbol;
+          }
+        }
+        return static_cast<Fortran::semantics::Symbol *>(nullptr);
+      },
+      [](auto &&) {
+        return static_cast<Fortran::semantics::Symbol *>(nullptr);
+      },
+  });
+}
+
 void gatherFuncAndVarSyms(
     const ObjectList &objects, mlir::omp::DeclareTargetCaptureClause clause,
     llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause) {
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 693a91f7cc6954..5e0ebba23bf358 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -34,6 +34,9 @@ struct OmpObjectList;
 } // namespace parser
 
 namespace lower {
+namespace pft {
+struct Evaluation;
+}
 
 class AbstractConverter;
 
@@ -54,6 +57,9 @@ createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc,
 mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter,
                           std::size_t loopVarTypeSize);
 
+Fortran::semantics::Symbol *
+getIterationVariableSymbol(const Fortran::lower::pft::Evaluation &eval);
+
 void gatherFuncAndVarSyms(
     const ObjectList &objects, mlir::omp::DeclareTargetCaptureClause clause,
     llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause);

>From 8f1ce585ea3b32551563c2949630ad4118d511e6 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Wed, 24 Apr 2024 09:51:49 -0500
Subject: [PATCH 03/15] [flang][OpenMP] Pass symTable to all genXYZ functions,
 NFC

This will unify the interface a bit more.
---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 171 ++++++++++++++++++------------
 1 file changed, 105 insertions(+), 66 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index f454f5a45a5150..47935e6cf8efcf 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -548,11 +548,12 @@ struct OpWithBodyGenInfo {
           mlir::Operation *)>;
 
   OpWithBodyGenInfo(Fortran::lower::AbstractConverter &converter,
+                    Fortran::lower::SymMap &symTable,
                     Fortran::semantics::SemanticsContext &semaCtx,
                     mlir::Location loc, Fortran::lower::pft::Evaluation &eval,
                     llvm::omp::Directive dir)
-      : converter(converter), semaCtx(semaCtx), loc(loc), eval(eval), dir(dir) {
-  }
+      : converter(converter), symTable(symTable), semaCtx(semaCtx), loc(loc),
+        eval(eval), dir(dir) {}
 
   OpWithBodyGenInfo &setGenNested(bool value) {
     genNested = value;
@@ -589,6 +590,8 @@ struct OpWithBodyGenInfo {
 
   /// [inout] converter to use for the clauses.
   Fortran::lower::AbstractConverter &converter;
+  /// [in] Symbol table
+  Fortran::lower::SymMap &symTable;
   /// [in] Semantics context
   Fortran::semantics::SemanticsContext &semaCtx;
   /// [in] location in source code.
@@ -764,6 +767,7 @@ static void createBodyOfOp(mlir::Operation &op, OpWithBodyGenInfo &info) {
 
 static void genBodyOfTargetDataOp(
     Fortran::lower::AbstractConverter &converter,
+    Fortran::lower::SymMap &symTable,
     Fortran::semantics::SemanticsContext &semaCtx,
     Fortran::lower::pft::Evaluation &eval, bool genNested,
     mlir::omp::TargetDataOp &dataOp, llvm::ArrayRef<mlir::Type> useDeviceTypes,
@@ -830,6 +834,7 @@ static void genBodyOfTargetDataOp(
 // all the symbols present in mapSymbols as block arguments to this block.
 static void
 genBodyOfTargetOp(Fortran::lower::AbstractConverter &converter,
+                  Fortran::lower::SymMap &symTable,
                   Fortran::semantics::SemanticsContext &semaCtx,
                   Fortran::lower::pft::Evaluation &eval, bool genNested,
                   mlir::omp::TargetOp &targetOp,
@@ -1267,6 +1272,7 @@ static void genWsloopClauses(
 
 static mlir::omp::BarrierOp
 genBarrierOp(Fortran::lower::AbstractConverter &converter,
+             Fortran::lower::SymMap &symTable,
              Fortran::semantics::SemanticsContext &semaCtx,
              Fortran::lower::pft::Evaluation &eval, mlir::Location loc) {
   return converter.getFirOpBuilder().create<mlir::omp::BarrierOp>(loc);
@@ -1274,6 +1280,7 @@ genBarrierOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::CriticalOp
 genCriticalOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
               Fortran::lower::pft::Evaluation &eval, bool genNested,
               mlir::Location loc, const List<Clause> &clauses,
@@ -1298,7 +1305,7 @@ genCriticalOp(Fortran::lower::AbstractConverter &converter,
   }
 
   return genOpWithBody<mlir::omp::CriticalOp>(
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_critical)
           .setGenNested(genNested),
       nameAttr);
@@ -1306,6 +1313,7 @@ genCriticalOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::DistributeOp
 genDistributeOp(Fortran::lower::AbstractConverter &converter,
+                Fortran::lower::SymMap &symTable,
                 Fortran::semantics::SemanticsContext &semaCtx,
                 Fortran::lower::pft::Evaluation &eval, bool genNested,
                 mlir::Location loc, const List<Clause> &clauses) {
@@ -1315,6 +1323,7 @@ genDistributeOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::FlushOp
 genFlushOp(Fortran::lower::AbstractConverter &converter,
+           Fortran::lower::SymMap &symTable,
            Fortran::semantics::SemanticsContext &semaCtx,
            Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
            const ObjectList &objects, const List<Clause> &clauses) {
@@ -1327,17 +1336,19 @@ genFlushOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::MasterOp
 genMasterOp(Fortran::lower::AbstractConverter &converter,
+            Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
             Fortran::lower::pft::Evaluation &eval, bool genNested,
             mlir::Location loc) {
   return genOpWithBody<mlir::omp::MasterOp>(
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_master)
           .setGenNested(genNested));
 }
 
 static mlir::omp::OrderedOp
 genOrderedOp(Fortran::lower::AbstractConverter &converter,
+             Fortran::lower::SymMap &symTable,
              Fortran::semantics::SemanticsContext &semaCtx,
              Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
              const List<Clause> &clauses) {
@@ -1347,6 +1358,7 @@ genOrderedOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::OrderedRegionOp
 genOrderedRegionOp(Fortran::lower::AbstractConverter &converter,
+                   Fortran::lower::SymMap &symTable,
                    Fortran::semantics::SemanticsContext &semaCtx,
                    Fortran::lower::pft::Evaluation &eval, bool genNested,
                    mlir::Location loc, const List<Clause> &clauses) {
@@ -1354,7 +1366,7 @@ genOrderedRegionOp(Fortran::lower::AbstractConverter &converter,
   genOrderedRegionClauses(converter, semaCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::OrderedRegionOp>(
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_ordered)
           .setGenNested(genNested),
       clauseOps);
@@ -1383,7 +1395,7 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
   };
 
   OpWithBodyGenInfo genInfo =
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_parallel)
           .setGenNested(genNested)
           .setOuterCombined(outerCombined)
@@ -1440,13 +1452,14 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::SectionOp
 genSectionOp(Fortran::lower::AbstractConverter &converter,
+             Fortran::lower::SymMap &symTable,
              Fortran::semantics::SemanticsContext &semaCtx,
              Fortran::lower::pft::Evaluation &eval, bool genNested,
              mlir::Location loc, const List<Clause> &clauses) {
   // Currently only private/firstprivate clause is handled, and
   // all privatization is done within `omp.section` operations.
   return genOpWithBody<mlir::omp::SectionOp>(
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_section)
           .setGenNested(genNested)
           .setClauses(&clauses));
@@ -1454,11 +1467,12 @@ genSectionOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::SectionsOp
 genSectionsOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
               const mlir::omp::SectionsClauseOps &clauseOps) {
   return genOpWithBody<mlir::omp::SectionsOp>(
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_sections)
           .setGenNested(false),
       clauseOps);
@@ -1466,6 +1480,7 @@ genSectionsOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::SimdOp
 genSimdOp(Fortran::lower::AbstractConverter &converter,
+          Fortran::lower::SymMap &symTable,
           Fortran::semantics::SemanticsContext &semaCtx,
           Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
           const List<Clause> &clauses) {
@@ -1499,8 +1514,8 @@ genSimdOp(Fortran::lower::AbstractConverter &converter,
   };
 
   createBodyOfOp(*loopOp,
-                 OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval,
-                                   llvm::omp::Directive::OMPD_simd)
+                 OpWithBodyGenInfo(converter, symTable, semaCtx, loc,
+                                   *nestedEval, llvm::omp::Directive::OMPD_simd)
                      .setClauses(&clauses)
                      .setDataSharingProcessor(&dsp)
                      .setGenRegionEntryCb(ivCallback));
@@ -1510,6 +1525,7 @@ genSimdOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::SingleOp
 genSingleOp(Fortran::lower::AbstractConverter &converter,
+            Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
             Fortran::lower::pft::Evaluation &eval, bool genNested,
             mlir::Location loc, const List<Clause> &clauses) {
@@ -1517,7 +1533,7 @@ genSingleOp(Fortran::lower::AbstractConverter &converter,
   genSingleClauses(converter, semaCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::SingleOp>(
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_single)
           .setGenNested(genNested)
           .setClauses(&clauses),
@@ -1526,6 +1542,7 @@ genSingleOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::TargetOp
 genTargetOp(Fortran::lower::AbstractConverter &converter,
+            Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
             Fortran::lower::pft::Evaluation &eval, bool genNested,
             mlir::Location loc, const List<Clause> &clauses,
@@ -1633,13 +1650,14 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
   Fortran::lower::pft::visitAllSymbols(eval, captureImplicitMap);
 
   auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps);
-  genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp, mapSyms,
-                    mapLocs, mapTypes, loc);
+  genBodyOfTargetOp(converter, symTable, semaCtx, eval, genNested, targetOp,
+                    mapSyms, mapLocs, mapTypes, loc);
   return targetOp;
 }
 
 static mlir::omp::TargetDataOp
 genTargetDataOp(Fortran::lower::AbstractConverter &converter,
+                Fortran::lower::SymMap &symTable,
                 Fortran::semantics::SemanticsContext &semaCtx,
                 Fortran::lower::pft::Evaluation &eval, bool genNested,
                 mlir::Location loc, const List<Clause> &clauses) {
@@ -1654,14 +1672,16 @@ genTargetDataOp(Fortran::lower::AbstractConverter &converter,
   auto targetDataOp =
       converter.getFirOpBuilder().create<mlir::omp::TargetDataOp>(loc,
                                                                   clauseOps);
-  genBodyOfTargetDataOp(converter, semaCtx, eval, genNested, targetDataOp,
-                        useDeviceTypes, useDeviceLocs, useDeviceSyms, loc);
+  genBodyOfTargetDataOp(converter, symTable, semaCtx, eval, genNested,
+                        targetDataOp, useDeviceTypes, useDeviceLocs,
+                        useDeviceSyms, loc);
   return targetDataOp;
 }
 
 template <typename OpTy>
 static OpTy
 genTargetEnterExitUpdateDataOp(Fortran::lower::AbstractConverter &converter,
+                               Fortran::lower::SymMap &symTable,
                                Fortran::semantics::SemanticsContext &semaCtx,
                                mlir::Location loc,
                                const List<Clause> &clauses) {
@@ -1689,6 +1709,7 @@ genTargetEnterExitUpdateDataOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::TaskOp
 genTaskOp(Fortran::lower::AbstractConverter &converter,
+          Fortran::lower::SymMap &symTable,
           Fortran::semantics::SemanticsContext &semaCtx,
           Fortran::lower::pft::Evaluation &eval, bool genNested,
           mlir::Location loc, const List<Clause> &clauses) {
@@ -1697,7 +1718,7 @@ genTaskOp(Fortran::lower::AbstractConverter &converter,
   genTaskClauses(converter, semaCtx, stmtCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::TaskOp>(
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_task)
           .setGenNested(genNested)
           .setClauses(&clauses),
@@ -1706,6 +1727,7 @@ genTaskOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::TaskgroupOp
 genTaskgroupOp(Fortran::lower::AbstractConverter &converter,
+               Fortran::lower::SymMap &symTable,
                Fortran::semantics::SemanticsContext &semaCtx,
                Fortran::lower::pft::Evaluation &eval, bool genNested,
                mlir::Location loc, const List<Clause> &clauses) {
@@ -1713,7 +1735,7 @@ genTaskgroupOp(Fortran::lower::AbstractConverter &converter,
   genTaskgroupClauses(converter, semaCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::TaskgroupOp>(
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_taskgroup)
           .setGenNested(genNested)
           .setClauses(&clauses),
@@ -1722,6 +1744,7 @@ genTaskgroupOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::TaskloopOp
 genTaskloopOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
               const List<Clause> &clauses) {
@@ -1730,6 +1753,7 @@ genTaskloopOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::TaskwaitOp
 genTaskwaitOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
               const List<Clause> &clauses) {
@@ -1741,6 +1765,7 @@ genTaskwaitOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::TaskyieldOp
 genTaskyieldOp(Fortran::lower::AbstractConverter &converter,
+               Fortran::lower::SymMap &symTable,
                Fortran::semantics::SemanticsContext &semaCtx,
                Fortran::lower::pft::Evaluation &eval, mlir::Location loc) {
   return converter.getFirOpBuilder().create<mlir::omp::TaskyieldOp>(loc);
@@ -1748,6 +1773,7 @@ genTaskyieldOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::TeamsOp
 genTeamsOp(Fortran::lower::AbstractConverter &converter,
+           Fortran::lower::SymMap &symTable,
            Fortran::semantics::SemanticsContext &semaCtx,
            Fortran::lower::pft::Evaluation &eval, bool genNested,
            mlir::Location loc, const List<Clause> &clauses,
@@ -1757,7 +1783,7 @@ genTeamsOp(Fortran::lower::AbstractConverter &converter,
   genTeamsClauses(converter, semaCtx, stmtCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::TeamsOp>(
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_teams)
           .setGenNested(genNested)
           .setOuterCombined(outerCombined)
@@ -1767,6 +1793,7 @@ genTeamsOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::WsloopOp
 genWsloopOp(Fortran::lower::AbstractConverter &converter,
+            Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
             Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
             const List<Clause> &clauses) {
@@ -1805,8 +1832,8 @@ genWsloopOp(Fortran::lower::AbstractConverter &converter,
   };
 
   createBodyOfOp(*loopOp,
-                 OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval,
-                                   llvm::omp::Directive::OMPD_do)
+                 OpWithBodyGenInfo(converter, symTable, semaCtx, loc,
+                                   *nestedEval, llvm::omp::Directive::OMPD_do)
                      .setClauses(&clauses)
                      .setDataSharingProcessor(&dsp)
                      .setReductions(&reductionSyms, &reductionTypes)
@@ -1820,6 +1847,7 @@ genWsloopOp(Fortran::lower::AbstractConverter &converter,
 
 static void
 genCompositeDistributeParallelDo(Fortran::lower::AbstractConverter &converter,
+                                 Fortran::lower::SymMap &symTable,
                                  Fortran::semantics::SemanticsContext &semaCtx,
                                  Fortran::lower::pft::Evaluation &eval,
                                  const List<Clause> &clauses,
@@ -1829,6 +1857,7 @@ genCompositeDistributeParallelDo(Fortran::lower::AbstractConverter &converter,
 
 static void genCompositeDistributeParallelDoSimd(
     Fortran::lower::AbstractConverter &converter,
+    Fortran::lower::SymMap &symTable,
     Fortran::semantics::SemanticsContext &semaCtx,
     Fortran::lower::pft::Evaluation &eval, const List<Clause> &clauses,
     mlir::Location loc) {
@@ -1837,6 +1866,7 @@ static void genCompositeDistributeParallelDoSimd(
 
 static void
 genCompositeDistributeSimd(Fortran::lower::AbstractConverter &converter,
+                           Fortran::lower::SymMap &symTable,
                            Fortran::semantics::SemanticsContext &semaCtx,
                            Fortran::lower::pft::Evaluation &eval,
                            const List<Clause> &clauses, mlir::Location loc) {
@@ -1844,6 +1874,7 @@ genCompositeDistributeSimd(Fortran::lower::AbstractConverter &converter,
 }
 
 static void genCompositeDoSimd(Fortran::lower::AbstractConverter &converter,
+                               Fortran::lower::SymMap &symTable,
                                Fortran::semantics::SemanticsContext &semaCtx,
                                Fortran::lower::pft::Evaluation &eval,
                                const List<Clause> &clauses,
@@ -1860,11 +1891,12 @@ static void genCompositeDoSimd(Fortran::lower::AbstractConverter &converter,
   // When support for vectorization is enabled, then we need to add handling of
   // if clause. Currently if clause can be skipped because we always assume
   // SIMD length = 1.
-  genWsloopOp(converter, semaCtx, eval, loc, clauses);
+  genWsloopOp(converter, symTable, semaCtx, eval, loc, clauses);
 }
 
 static void
 genCompositeTaskloopSimd(Fortran::lower::AbstractConverter &converter,
+                         Fortran::lower::SymMap &symTable,
                          Fortran::semantics::SemanticsContext &semaCtx,
                          Fortran::lower::pft::Evaluation &eval,
                          const List<Clause> &clauses, mlir::Location loc) {
@@ -1985,32 +2017,32 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
   default:
     break;
   case llvm::omp::Directive::OMPD_barrier:
-    genBarrierOp(converter, semaCtx, eval, currentLocation);
+    genBarrierOp(converter, symTable, semaCtx, eval, currentLocation);
     break;
   case llvm::omp::Directive::OMPD_taskwait:
-    genTaskwaitOp(converter, semaCtx, eval, currentLocation, clauses);
+    genTaskwaitOp(converter, symTable, semaCtx, eval, currentLocation, clauses);
     break;
   case llvm::omp::Directive::OMPD_taskyield:
-    genTaskyieldOp(converter, semaCtx, eval, currentLocation);
+    genTaskyieldOp(converter, symTable, semaCtx, eval, currentLocation);
     break;
   case llvm::omp::Directive::OMPD_target_data:
-    genTargetDataOp(converter, semaCtx, eval, /*genNested=*/true,
+    genTargetDataOp(converter, symTable, semaCtx, eval, /*genNested=*/true,
                     currentLocation, clauses);
     break;
   case llvm::omp::Directive::OMPD_target_enter_data:
     genTargetEnterExitUpdateDataOp<mlir::omp::TargetEnterDataOp>(
-        converter, semaCtx, currentLocation, clauses);
+        converter, symTable, semaCtx, currentLocation, clauses);
     break;
   case llvm::omp::Directive::OMPD_target_exit_data:
     genTargetEnterExitUpdateDataOp<mlir::omp::TargetExitDataOp>(
-        converter, semaCtx, currentLocation, clauses);
+        converter, symTable, semaCtx, currentLocation, clauses);
     break;
   case llvm::omp::Directive::OMPD_target_update:
     genTargetEnterExitUpdateDataOp<mlir::omp::TargetUpdateOp>(
-        converter, semaCtx, currentLocation, clauses);
+        converter, symTable, semaCtx, currentLocation, clauses);
     break;
   case llvm::omp::Directive::OMPD_ordered:
-    genOrderedOp(converter, semaCtx, eval, currentLocation, clauses);
+    genOrderedOp(converter, symTable, semaCtx, eval, currentLocation, clauses);
     break;
   }
 }
@@ -2034,7 +2066,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
                             [&](auto &&s) { return makeClause(s.v, semaCtx); })
                  : List<Clause>{};
   mlir::Location currentLocation = converter.genLocation(verbatim.source);
-  genFlushOp(converter, semaCtx, eval, currentLocation, objects, clauses);
+  genFlushOp(converter, symTable, semaCtx, eval, currentLocation, objects,
+             clauses);
 }
 
 static void
@@ -2187,12 +2220,13 @@ genOMP(Fortran::lower::AbstractConverter &converter,
     switch (leafDir) {
     case llvm::omp::Directive::OMPD_master:
       // 2.16 MASTER construct.
-      genMasterOp(converter, semaCtx, eval, genNested, currentLocation);
+      genMasterOp(converter, symTable, semaCtx, eval, genNested,
+                  currentLocation);
       break;
     case llvm::omp::Directive::OMPD_ordered:
       // 2.17.9 ORDERED construct.
-      genOrderedRegionOp(converter, semaCtx, eval, genNested, currentLocation,
-                         clauses);
+      genOrderedRegionOp(converter, symTable, semaCtx, eval, genNested,
+                         currentLocation, clauses);
       break;
     case llvm::omp::Directive::OMPD_parallel:
       // 2.6 PARALLEL construct.
@@ -2201,41 +2235,43 @@ genOMP(Fortran::lower::AbstractConverter &converter,
       break;
     case llvm::omp::Directive::OMPD_single:
       // 2.8.2 SINGLE construct.
-      genSingleOp(converter, semaCtx, eval, genNested, currentLocation,
-                  clauses);
+      genSingleOp(converter, symTable, semaCtx, eval, genNested,
+                  currentLocation, clauses);
       break;
     case llvm::omp::Directive::OMPD_target:
       // 2.12.5 TARGET construct.
-      genTargetOp(converter, semaCtx, eval, genNested, currentLocation, clauses,
-                  outerCombined);
+      genTargetOp(converter, symTable, semaCtx, eval, genNested,
+                  currentLocation, clauses, outerCombined);
       break;
     case llvm::omp::Directive::OMPD_target_data:
       // 2.12.2 TARGET DATA construct.
-      genTargetDataOp(converter, semaCtx, eval, genNested, currentLocation,
-                      clauses);
+      genTargetDataOp(converter, symTable, semaCtx, eval, genNested,
+                      currentLocation, clauses);
       break;
     case llvm::omp::Directive::OMPD_task:
       // 2.10.1 TASK construct.
-      genTaskOp(converter, semaCtx, eval, genNested, currentLocation, clauses);
+      genTaskOp(converter, symTable, semaCtx, eval, genNested, currentLocation,
+                clauses);
       break;
     case llvm::omp::Directive::OMPD_taskgroup:
       // 2.17.6 TASKGROUP construct.
-      genTaskgroupOp(converter, semaCtx, eval, genNested, currentLocation,
-                     clauses);
+      genTaskgroupOp(converter, symTable, semaCtx, eval, genNested,
+                     currentLocation, clauses);
       break;
     case llvm::omp::Directive::OMPD_teams:
       // 2.7 TEAMS construct.
       // FIXME Pass the outerCombined argument or rename it to better describe
       // what it represents if it must always be `false` in this context.
-      genTeamsOp(converter, semaCtx, eval, genNested, currentLocation, clauses);
+      genTeamsOp(converter, symTable, semaCtx, eval, genNested, currentLocation,
+                 clauses);
       break;
     case llvm::omp::Directive::OMPD_workshare:
       // 2.8.3 WORKSHARE construct.
       // FIXME: Workshare is not a commonly used OpenMP construct, an
       // implementation for this feature will come later. For the codes
       // that use this construct, add a single construct for now.
-      genSingleOp(converter, semaCtx, eval, genNested, currentLocation,
-                  clauses);
+      genSingleOp(converter, symTable, semaCtx, eval, genNested,
+                  currentLocation, clauses);
       break;
     default:
       llvm_unreachable("Unexpected block construct");
@@ -2257,8 +2293,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
       makeClauses(std::get<Fortran::parser::OmpClauseList>(cd.t), semaCtx);
   const auto &name = std::get<std::optional<Fortran::parser::Name>>(cd.t);
   mlir::Location currentLocation = converter.getCurrentLocation();
-  genCriticalOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
-                clauses, name);
+  genCriticalOp(converter, symTable, semaCtx, eval, /*genNested=*/true,
+                currentLocation, clauses, name);
 }
 
 static void
@@ -2304,26 +2340,27 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
       switch (leafDir) {
       case llvm::omp::Directive::OMPD_distribute_parallel_do:
         // 2.9.4.3 DISTRIBUTE PARALLEL Worksharing-Loop construct.
-        genCompositeDistributeParallelDo(converter, semaCtx, eval, clauses,
-                                         currentLocation);
+        genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval,
+                                         clauses, currentLocation);
         break;
       case llvm::omp::Directive::OMPD_distribute_parallel_do_simd:
         // 2.9.4.4 DISTRIBUTE PARALLEL Worksharing-Loop SIMD construct.
-        genCompositeDistributeParallelDoSimd(converter, semaCtx, eval, clauses,
-                                             currentLocation);
+        genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval,
+                                             clauses, currentLocation);
         break;
       case llvm::omp::Directive::OMPD_distribute_simd:
         // 2.9.4.2 DISTRIBUTE SIMD construct.
-        genCompositeDistributeSimd(converter, semaCtx, eval, clauses,
+        genCompositeDistributeSimd(converter, symTable, semaCtx, eval, clauses,
                                    currentLocation);
         break;
       case llvm::omp::Directive::OMPD_do_simd:
         // 2.9.3.2 Worksharing-Loop SIMD construct.
-        genCompositeDoSimd(converter, semaCtx, eval, clauses, currentLocation);
+        genCompositeDoSimd(converter, symTable, semaCtx, eval, clauses,
+                           currentLocation);
         break;
       case llvm::omp::Directive::OMPD_taskloop_simd:
         // 2.10.3 TASKLOOP SIMD construct.
-        genCompositeTaskloopSimd(converter, semaCtx, eval, clauses,
+        genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, clauses,
                                  currentLocation);
         break;
       default:
@@ -2334,12 +2371,13 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
       switch (leafDir) {
       case llvm::omp::Directive::OMPD_distribute:
         // 2.9.4.1 DISTRIBUTE construct.
-        genDistributeOp(converter, semaCtx, eval, genNested, currentLocation,
-                        clauses);
+        genDistributeOp(converter, symTable, semaCtx, eval, genNested,
+                        currentLocation, clauses);
         break;
       case llvm::omp::Directive::OMPD_do:
         // 2.9.2 Worksharing-Loop construct.
-        genWsloopOp(converter, semaCtx, eval, currentLocation, clauses);
+        genWsloopOp(converter, symTable, semaCtx, eval, currentLocation,
+                    clauses);
         break;
       case llvm::omp::Directive::OMPD_parallel:
         // 2.6 PARALLEL construct.
@@ -2353,16 +2391,17 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
         break;
       case llvm::omp::Directive::OMPD_simd:
         // 2.9.3.1 SIMD construct.
-        genSimdOp(converter, semaCtx, eval, currentLocation, clauses);
+        genSimdOp(converter, symTable, semaCtx, eval, currentLocation, clauses);
         break;
       case llvm::omp::Directive::OMPD_target:
         // 2.12.5 TARGET construct.
-        genTargetOp(converter, semaCtx, eval, genNested, currentLocation,
-                    clauses, /*outerCombined=*/true);
+        genTargetOp(converter, symTable, semaCtx, eval, genNested,
+                    currentLocation, clauses, /*outerCombined=*/true);
         break;
       case llvm::omp::Directive::OMPD_taskloop:
         // 2.10.2 TASKLOOP construct.
-        genTaskloopOp(converter, semaCtx, eval, currentLocation, clauses);
+        genTaskloopOp(converter, symTable, semaCtx, eval, currentLocation,
+                      clauses);
         break;
       case llvm::omp::Directive::OMPD_teams:
         // 2.7 TEAMS construct.
@@ -2370,8 +2409,8 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
         // combined construct in this constext (e.g. target teams distribute).
         // Maybe rename the argument if it represents something else or
         // initialize it properly.
-        genTeamsOp(converter, semaCtx, eval, genNested, currentLocation,
-                   clauses, /*outerCombined=*/true);
+        genTeamsOp(converter, symTable, semaCtx, eval, genNested,
+                   currentLocation, clauses, /*outerCombined=*/true);
         break;
       case llvm::omp::Directive::OMPD_loop:
       case llvm::omp::Directive::OMPD_masked:
@@ -2433,7 +2472,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
   }
 
   // SECTIONS construct.
-  genSectionsOp(converter, semaCtx, eval, currentLocation, clauseOps);
+  genSectionsOp(converter, symTable, semaCtx, eval, currentLocation, clauseOps);
 
   // Generate nested SECTION operations recursively.
   const auto &sectionBlocks =
@@ -2443,8 +2482,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
   for (const auto &[nblock, neval] :
        llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) {
     symTable.pushScope();
-    genSectionOp(converter, semaCtx, neval, /*genNested=*/true, currentLocation,
-                 clauses);
+    genSectionOp(converter, symTable, semaCtx, neval, /*genNested=*/true,
+                 currentLocation, clauses);
     symTable.popScope();
     firOpBuilder.restoreInsertionPoint(ip);
   }

>From 6f7697e46ace92707bc4cf648fab25a72c0639a1 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Thu, 11 Apr 2024 10:35:02 -0500
Subject: [PATCH 04/15] [flang][OpenMP] Decompose compound construccts, do
 recursive lowering

A compound construct with a list of clauses is broken up into
individual leaf/composite constructs. Each such construct has
the list of clauses that apply to it based on the OpenMP spec.

Each lowering function (i.e. a function that generates MLIR ops)
is now responsible for generating its body as described below.

Functions that receive AST nodes extract the construct, and the
clauses from the node. They then create a work queue consisting
of individual constructs, and invoke a common dispatch function.

The dispatch function examines the current position in the queue,
and invokes the appropriate lowering function. Each lowering
function receives the queue as well, and once it needs to generate
its body, it either invokes the dispatch function on the rest of
the queue (if any), or processes nested evaluations if the work
queue is at the end.
---
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 784 +++++++-------
 .../Frontend/OpenMP/ConstructDecompositionT.h | 985 ++++++++++++++++++
 2 files changed, 1376 insertions(+), 393 deletions(-)
 create mode 100644 llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 47935e6cf8efcf..4b8afd42f639d5 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -36,6 +36,7 @@
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Frontend/OpenMP/ConstructDecompositionT.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 
 using namespace Fortran::lower::omp;
@@ -72,6 +73,89 @@ static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
     converter.genEval(e);
 }
 
+//===----------------------------------------------------------------------===//
+// Directive decomposition
+//===----------------------------------------------------------------------===//
+
+namespace {
+using DirectiveWithClauses = tomp::DirectiveWithClauses<lower::omp::Clause>;
+using ConstructQueue = List<DirectiveWithClauses>;
+} // namespace
+
+static void genOMPDispatch(Fortran::lower::AbstractConverter &converter,
+                           Fortran::lower::SymMap &symTable,
+                           Fortran::semantics::SemanticsContext &semaCtx,
+                           Fortran::lower::pft::Evaluation &eval,
+                           mlir::Location loc, const ConstructQueue &queue,
+                           ConstructQueue::iterator item);
+
+namespace {
+struct ConstructDecomposition {
+  ConstructDecomposition(mlir::ModuleOp modOp,
+                         semantics::SemanticsContext &semaCtx,
+                         lower::pft::Evaluation &ev,
+                         llvm::omp::Directive construct,
+                         const List<Clause> &clauses)
+      : semaCtx(semaCtx), mod(modOp), eval(ev) {
+    tomp::ConstructDecompositionT decompose(getOpenMPVersion(modOp), *this,
+                                            construct, llvm::ArrayRef(clauses));
+    output = std::move(decompose.output);
+  }
+
+  // Given an object, return its base object if one exists.
+  std::optional<Object> getBaseObject(const Object &object) {
+    return lower::omp::getBaseObject(object, semaCtx);
+  }
+
+  // Return the iteration variable of the associated loop if any.
+  std::optional<Object> getLoopIterVar() {
+    if (semantics::Symbol *symbol = getIterationVariableSymbol(eval))
+      return Object{symbol, /*designator=*/{}};
+    return std::nullopt;
+  }
+
+  semantics::SemanticsContext &semaCtx;
+  mlir::ModuleOp mod;
+  lower::pft::Evaluation &eval;
+  List<DirectiveWithClauses> output;
+};
+} // namespace
+
+LLVM_DUMP_METHOD static llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os, const DirectiveWithClauses &dwc) {
+  os << llvm::omp::getOpenMPDirectiveName(dwc.id);
+  for (auto [index, clause] : llvm::enumerate(dwc.clauses)) {
+    os << (index == 0 ? '\t' : ' ');
+    os << llvm::omp::getOpenMPClauseName(clause.id);
+  }
+  return os;
+}
+
+static void splitCompoundConstruct(
+    mlir::ModuleOp modOp, Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::pft::Evaluation &eval, llvm::omp::Directive construct,
+    const List<Clause> &clauses, List<DirectiveWithClauses> &directives) {
+
+  ConstructDecomposition decompose(modOp, semaCtx, eval, construct, clauses);
+  assert(!decompose.output.empty());
+
+  llvm::SmallVector<llvm::omp::Directive> loweringUnits;
+  std::ignore =
+      llvm::omp::getLeafOrCompositeConstructs(construct, loweringUnits);
+
+  int leafIndex = 0;
+  for (llvm::omp::Directive dir_id : loweringUnits) {
+    directives.push_back(DirectiveWithClauses{dir_id});
+    DirectiveWithClauses &dwc = directives.back();
+    llvm::ArrayRef<llvm::omp::Directive> leafsOrSelf =
+        llvm::omp::getLeafConstructsOrSelf(dir_id);
+    for (int i = 0, e = leafsOrSelf.size(); i != e; ++i) {
+      dwc.clauses.append(decompose.output[leafIndex].clauses);
+      ++leafIndex;
+    }
+  }
+}
+
 static fir::GlobalOp globalInitialization(
     Fortran::lower::AbstractConverter &converter,
     fir::FirOpBuilder &firOpBuilder, const Fortran::semantics::Symbol &sym,
@@ -460,81 +544,6 @@ markDeclareTarget(mlir::Operation *op,
   declareTargetOp.setDeclareTarget(deviceType, captureClause);
 }
 
-/// Split a combined directive into an outer leaf directive and the (possibly
-/// combined) rest of the combined directive. Composite directives and
-/// non-compound directives are not split, in which case it will return the
-/// input directive as its first output and an empty value as its second output.
-static std::pair<llvm::omp::Directive, std::optional<llvm::omp::Directive>>
-splitCombinedDirective(llvm::omp::Directive dir) {
-  using D = llvm::omp::Directive;
-  switch (dir) {
-  case D::OMPD_masked_taskloop:
-    return {D::OMPD_masked, D::OMPD_taskloop};
-  case D::OMPD_masked_taskloop_simd:
-    return {D::OMPD_masked, D::OMPD_taskloop_simd};
-  case D::OMPD_master_taskloop:
-    return {D::OMPD_master, D::OMPD_taskloop};
-  case D::OMPD_master_taskloop_simd:
-    return {D::OMPD_master, D::OMPD_taskloop_simd};
-  case D::OMPD_parallel_do:
-    return {D::OMPD_parallel, D::OMPD_do};
-  case D::OMPD_parallel_do_simd:
-    return {D::OMPD_parallel, D::OMPD_do_simd};
-  case D::OMPD_parallel_masked:
-    return {D::OMPD_parallel, D::OMPD_masked};
-  case D::OMPD_parallel_masked_taskloop:
-    return {D::OMPD_parallel, D::OMPD_masked_taskloop};
-  case D::OMPD_parallel_masked_taskloop_simd:
-    return {D::OMPD_parallel, D::OMPD_masked_taskloop_simd};
-  case D::OMPD_parallel_master:
-    return {D::OMPD_parallel, D::OMPD_master};
-  case D::OMPD_parallel_master_taskloop:
-    return {D::OMPD_parallel, D::OMPD_master_taskloop};
-  case D::OMPD_parallel_master_taskloop_simd:
-    return {D::OMPD_parallel, D::OMPD_master_taskloop_simd};
-  case D::OMPD_parallel_sections:
-    return {D::OMPD_parallel, D::OMPD_sections};
-  case D::OMPD_parallel_workshare:
-    return {D::OMPD_parallel, D::OMPD_workshare};
-  case D::OMPD_target_parallel:
-    return {D::OMPD_target, D::OMPD_parallel};
-  case D::OMPD_target_parallel_do:
-    return {D::OMPD_target, D::OMPD_parallel_do};
-  case D::OMPD_target_parallel_do_simd:
-    return {D::OMPD_target, D::OMPD_parallel_do_simd};
-  case D::OMPD_target_simd:
-    return {D::OMPD_target, D::OMPD_simd};
-  case D::OMPD_target_teams:
-    return {D::OMPD_target, D::OMPD_teams};
-  case D::OMPD_target_teams_distribute:
-    return {D::OMPD_target, D::OMPD_teams_distribute};
-  case D::OMPD_target_teams_distribute_parallel_do:
-    return {D::OMPD_target, D::OMPD_teams_distribute_parallel_do};
-  case D::OMPD_target_teams_distribute_parallel_do_simd:
-    return {D::OMPD_target, D::OMPD_teams_distribute_parallel_do_simd};
-  case D::OMPD_target_teams_distribute_simd:
-    return {D::OMPD_target, D::OMPD_teams_distribute_simd};
-  case D::OMPD_teams_distribute:
-    return {D::OMPD_teams, D::OMPD_distribute};
-  case D::OMPD_teams_distribute_parallel_do:
-    return {D::OMPD_teams, D::OMPD_distribute_parallel_do};
-  case D::OMPD_teams_distribute_parallel_do_simd:
-    return {D::OMPD_teams, D::OMPD_distribute_parallel_do_simd};
-  case D::OMPD_teams_distribute_simd:
-    return {D::OMPD_teams, D::OMPD_distribute_simd};
-  case D::OMPD_parallel_loop:
-    return {D::OMPD_parallel, D::OMPD_loop};
-  case D::OMPD_target_parallel_loop:
-    return {D::OMPD_target, D::OMPD_parallel_loop};
-  case D::OMPD_target_teams_loop:
-    return {D::OMPD_target, D::OMPD_teams_loop};
-  case D::OMPD_teams_loop:
-    return {D::OMPD_teams, D::OMPD_loop};
-  default:
-    return {dir, std::nullopt};
-  }
-}
-
 //===----------------------------------------------------------------------===//
 // Op body generation helper structures and functions
 //===----------------------------------------------------------------------===//
@@ -555,11 +564,6 @@ struct OpWithBodyGenInfo {
       : converter(converter), symTable(symTable), semaCtx(semaCtx), loc(loc),
         eval(eval), dir(dir) {}
 
-  OpWithBodyGenInfo &setGenNested(bool value) {
-    genNested = value;
-    return *this;
-  }
-
   OpWithBodyGenInfo &setOuterCombined(bool value) {
     outerCombined = value;
     return *this;
@@ -600,8 +604,6 @@ struct OpWithBodyGenInfo {
   Fortran::lower::pft::Evaluation &eval;
   /// [in] leaf directive for which to generate the op body.
   llvm::omp::Directive dir;
-  /// [in] whether to generate FIR for nested evaluations
-  bool genNested = true;
   /// [in] is this an outer operation - prevents privatization.
   bool outerCombined = false;
   /// [in] list of clauses to process.
@@ -622,7 +624,9 @@ struct OpWithBodyGenInfo {
 ///
 /// \param [in]   op - the operation the body belongs to.
 /// \param [in] info - options controlling code-gen for the construction.
-static void createBodyOfOp(mlir::Operation &op, OpWithBodyGenInfo &info) {
+static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info,
+                           const ConstructQueue &queue,
+                           ConstructQueue::iterator item) {
   fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder();
 
   auto insertMarker = [](fir::FirOpBuilder &builder) {
@@ -678,7 +682,10 @@ static void createBodyOfOp(mlir::Operation &op, OpWithBodyGenInfo &info) {
     }
   }
 
-  if (info.genNested) {
+  if (ConstructQueue::iterator next = std::next(item); next != queue.end()) {
+    genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.eval,
+                   info.loc, queue, next);
+  } else {
     // genFIR(Evaluation&) tries to patch up unterminated blocks, causing
     // a lot of complications for our approach if the terminator generation
     // is delayed past this point. Insert a temporary terminator here, then
@@ -769,11 +776,12 @@ static void genBodyOfTargetDataOp(
     Fortran::lower::AbstractConverter &converter,
     Fortran::lower::SymMap &symTable,
     Fortran::semantics::SemanticsContext &semaCtx,
-    Fortran::lower::pft::Evaluation &eval, bool genNested,
-    mlir::omp::TargetDataOp &dataOp, llvm::ArrayRef<mlir::Type> useDeviceTypes,
+    Fortran::lower::pft::Evaluation &eval, mlir::omp::TargetDataOp &dataOp,
+    llvm::ArrayRef<mlir::Type> useDeviceTypes,
     llvm::ArrayRef<mlir::Location> useDeviceLocs,
     llvm::ArrayRef<const Fortran::semantics::Symbol *> useDeviceSymbols,
-    const mlir::Location &currentLocation) {
+    const mlir::Location &currentLocation, const ConstructQueue &queue,
+    ConstructQueue::iterator item) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   mlir::Region &region = dataOp.getRegion();
 
@@ -826,8 +834,13 @@ static void genBodyOfTargetDataOp(
 
   // Set the insertion point after the marker.
   firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
-  if (genNested)
+
+  if (ConstructQueue::iterator next = std::next(item); next != queue.end()) {
+    genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
+                   next);
+  } else {
     genNestedEvaluations(converter, eval);
+  }
 }
 
 // This functions creates a block for the body of the targetOp's region. It adds
@@ -836,12 +849,13 @@ static void
 genBodyOfTargetOp(Fortran::lower::AbstractConverter &converter,
                   Fortran::lower::SymMap &symTable,
                   Fortran::semantics::SemanticsContext &semaCtx,
-                  Fortran::lower::pft::Evaluation &eval, bool genNested,
+                  Fortran::lower::pft::Evaluation &eval,
                   mlir::omp::TargetOp &targetOp,
                   llvm::ArrayRef<const Fortran::semantics::Symbol *> mapSyms,
                   llvm::ArrayRef<mlir::Location> mapSymLocs,
                   llvm::ArrayRef<mlir::Type> mapSymTypes,
-                  const mlir::Location &currentLocation) {
+                  const mlir::Location &currentLocation,
+                  const ConstructQueue &queue, ConstructQueue::iterator item) {
   assert(mapSymTypes.size() == mapSymLocs.size());
 
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
@@ -981,15 +995,22 @@ genBodyOfTargetOp(Fortran::lower::AbstractConverter &converter,
 
   // Create the insertion point after the marker.
   firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
-  if (genNested)
+
+  if (ConstructQueue::iterator next = std::next(item); next != queue.end()) {
+    genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
+                   next);
+  } else {
     genNestedEvaluations(converter, eval);
+  }
 }
 
 template <typename OpTy, typename... Args>
-static OpTy genOpWithBody(OpWithBodyGenInfo &info, Args &&...args) {
+static OpTy genOpWithBody(const OpWithBodyGenInfo &info,
+                          const ConstructQueue &queue,
+                          ConstructQueue::iterator item, Args &&...args) {
   auto op = info.converter.getFirOpBuilder().create<OpTy>(
       info.loc, std::forward<Args>(args)...);
-  createBodyOfOp(*op, info);
+  createBodyOfOp(*op, info, queue, item);
   return op;
 }
 
@@ -1274,7 +1295,8 @@ static mlir::omp::BarrierOp
 genBarrierOp(Fortran::lower::AbstractConverter &converter,
              Fortran::lower::SymMap &symTable,
              Fortran::semantics::SemanticsContext &semaCtx,
-             Fortran::lower::pft::Evaluation &eval, mlir::Location loc) {
+             Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+             const ConstructQueue &queue, ConstructQueue::iterator item) {
   return converter.getFirOpBuilder().create<mlir::omp::BarrierOp>(loc);
 }
 
@@ -1282,8 +1304,9 @@ static mlir::omp::CriticalOp
 genCriticalOp(Fortran::lower::AbstractConverter &converter,
               Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
-              Fortran::lower::pft::Evaluation &eval, bool genNested,
-              mlir::Location loc, const List<Clause> &clauses,
+              Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+              const List<Clause> &clauses, const ConstructQueue &queue,
+              ConstructQueue::iterator item,
               const std::optional<Fortran::parser::Name> &name) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   mlir::FlatSymbolRefAttr nameAttr;
@@ -1306,17 +1329,17 @@ genCriticalOp(Fortran::lower::AbstractConverter &converter,
 
   return genOpWithBody<mlir::omp::CriticalOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
-                        llvm::omp::Directive::OMPD_critical)
-          .setGenNested(genNested),
-      nameAttr);
+                        llvm::omp::Directive::OMPD_critical),
+      queue, item, nameAttr);
 }
 
 static mlir::omp::DistributeOp
 genDistributeOp(Fortran::lower::AbstractConverter &converter,
                 Fortran::lower::SymMap &symTable,
                 Fortran::semantics::SemanticsContext &semaCtx,
-                Fortran::lower::pft::Evaluation &eval, bool genNested,
-                mlir::Location loc, const List<Clause> &clauses) {
+                Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+                const List<Clause> &clauses, const ConstructQueue &queue,
+                ConstructQueue::iterator item) {
   TODO(loc, "Distribute construct");
   return nullptr;
 }
@@ -1326,7 +1349,8 @@ genFlushOp(Fortran::lower::AbstractConverter &converter,
            Fortran::lower::SymMap &symTable,
            Fortran::semantics::SemanticsContext &semaCtx,
            Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-           const ObjectList &objects, const List<Clause> &clauses) {
+           const ObjectList &objects, const List<Clause> &clauses,
+           const ConstructQueue &queue, ConstructQueue::iterator item) {
   llvm::SmallVector<mlir::Value> operandRange;
   genFlushClauses(converter, semaCtx, objects, clauses, loc, operandRange);
 
@@ -1338,12 +1362,13 @@ static mlir::omp::MasterOp
 genMasterOp(Fortran::lower::AbstractConverter &converter,
             Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
-            Fortran::lower::pft::Evaluation &eval, bool genNested,
-            mlir::Location loc) {
+            Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+            const List<Clause> &clauses, const ConstructQueue &queue,
+            ConstructQueue::iterator item) {
   return genOpWithBody<mlir::omp::MasterOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
-                        llvm::omp::Directive::OMPD_master)
-          .setGenNested(genNested));
+                        llvm::omp::Directive::OMPD_master),
+      queue, item);
 }
 
 static mlir::omp::OrderedOp
@@ -1351,7 +1376,8 @@ genOrderedOp(Fortran::lower::AbstractConverter &converter,
              Fortran::lower::SymMap &symTable,
              Fortran::semantics::SemanticsContext &semaCtx,
              Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-             const List<Clause> &clauses) {
+             const List<Clause> &clauses, const ConstructQueue &queue,
+             ConstructQueue::iterator item) {
   TODO(loc, "OMPD_ordered");
   return nullptr;
 }
@@ -1360,25 +1386,25 @@ static mlir::omp::OrderedRegionOp
 genOrderedRegionOp(Fortran::lower::AbstractConverter &converter,
                    Fortran::lower::SymMap &symTable,
                    Fortran::semantics::SemanticsContext &semaCtx,
-                   Fortran::lower::pft::Evaluation &eval, bool genNested,
-                   mlir::Location loc, const List<Clause> &clauses) {
+                   Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+                   const List<Clause> &clauses, const ConstructQueue &queue,
+                   ConstructQueue::iterator item) {
   mlir::omp::OrderedRegionClauseOps clauseOps;
   genOrderedRegionClauses(converter, semaCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::OrderedRegionOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
-                        llvm::omp::Directive::OMPD_ordered)
-          .setGenNested(genNested),
-      clauseOps);
+                        llvm::omp::Directive::OMPD_ordered),
+      queue, item, clauseOps);
 }
 
 static mlir::omp::ParallelOp
 genParallelOp(Fortran::lower::AbstractConverter &converter,
               Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
-              Fortran::lower::pft::Evaluation &eval, bool genNested,
-              mlir::Location loc, const List<Clause> &clauses,
-              bool outerCombined = false) {
+              Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+              const List<Clause> &clauses, const ConstructQueue &queue,
+              ConstructQueue::iterator item, bool outerCombined = false) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   Fortran::lower::StatementContext stmtCtx;
   mlir::omp::ParallelClauseOps clauseOps;
@@ -1397,14 +1423,14 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
   OpWithBodyGenInfo genInfo =
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_parallel)
-          .setGenNested(genNested)
           .setOuterCombined(outerCombined)
           .setClauses(&clauses)
           .setReductions(&reductionSyms, &reductionTypes)
           .setGenRegionEntryCb(reductionCallback);
 
   if (!enableDelayedPrivatization)
-    return genOpWithBody<mlir::omp::ParallelOp>(genInfo, clauseOps);
+    return genOpWithBody<mlir::omp::ParallelOp>(genInfo, queue, item,
+                                                clauseOps);
 
   bool privatize = !outerCombined;
   DataSharingProcessor dsp(converter, semaCtx, clauses, eval,
@@ -1447,22 +1473,23 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
 
   // TODO Merge with the reduction CB.
   genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp);
-  return genOpWithBody<mlir::omp::ParallelOp>(genInfo, clauseOps);
+  return genOpWithBody<mlir::omp::ParallelOp>(genInfo, queue, item, clauseOps);
 }
 
 static mlir::omp::SectionOp
 genSectionOp(Fortran::lower::AbstractConverter &converter,
              Fortran::lower::SymMap &symTable,
              Fortran::semantics::SemanticsContext &semaCtx,
-             Fortran::lower::pft::Evaluation &eval, bool genNested,
-             mlir::Location loc, const List<Clause> &clauses) {
+             Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+             const List<Clause> &clauses, const ConstructQueue &queue,
+             ConstructQueue::iterator item) {
   // Currently only private/firstprivate clause is handled, and
   // all privatization is done within `omp.section` operations.
   return genOpWithBody<mlir::omp::SectionOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_section)
-          .setGenNested(genNested)
-          .setClauses(&clauses));
+          .setClauses(&clauses),
+      queue, item);
 }
 
 static mlir::omp::SectionsOp
@@ -1470,12 +1497,14 @@ genSectionsOp(Fortran::lower::AbstractConverter &converter,
               Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-              const mlir::omp::SectionsClauseOps &clauseOps) {
+              const List<Clause> &clauses, const ConstructQueue &queue,
+              ConstructQueue::iterator item) {
+  mlir::omp::SectionsClauseOps clauseOps;
+  genSectionsClauses(converter, semaCtx, clauses, loc, clauseOps);
   return genOpWithBody<mlir::omp::SectionsOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
-                        llvm::omp::Directive::OMPD_sections)
-          .setGenNested(false),
-      clauseOps);
+                        llvm::omp::Directive::OMPD_sections),
+      queue, item, clauseOps);
 }
 
 static mlir::omp::SimdOp
@@ -1483,7 +1512,8 @@ genSimdOp(Fortran::lower::AbstractConverter &converter,
           Fortran::lower::SymMap &symTable,
           Fortran::semantics::SemanticsContext &semaCtx,
           Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-          const List<Clause> &clauses) {
+          const List<Clause> &clauses, const ConstructQueue &queue,
+          ConstructQueue::iterator item) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   DataSharingProcessor dsp(converter, semaCtx, clauses, eval);
   dsp.processStep1();
@@ -1518,7 +1548,8 @@ genSimdOp(Fortran::lower::AbstractConverter &converter,
                                    *nestedEval, llvm::omp::Directive::OMPD_simd)
                      .setClauses(&clauses)
                      .setDataSharingProcessor(&dsp)
-                     .setGenRegionEntryCb(ivCallback));
+                     .setGenRegionEntryCb(ivCallback),
+                 queue, item);
 
   return simdOp;
 }
@@ -1527,26 +1558,26 @@ static mlir::omp::SingleOp
 genSingleOp(Fortran::lower::AbstractConverter &converter,
             Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
-            Fortran::lower::pft::Evaluation &eval, bool genNested,
-            mlir::Location loc, const List<Clause> &clauses) {
+            Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+            const List<Clause> &clauses, const ConstructQueue &queue,
+            ConstructQueue::iterator item) {
   mlir::omp::SingleClauseOps clauseOps;
   genSingleClauses(converter, semaCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::SingleOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_single)
-          .setGenNested(genNested)
           .setClauses(&clauses),
-      clauseOps);
+      queue, item, clauseOps);
 }
 
 static mlir::omp::TargetOp
 genTargetOp(Fortran::lower::AbstractConverter &converter,
             Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
-            Fortran::lower::pft::Evaluation &eval, bool genNested,
-            mlir::Location loc, const List<Clause> &clauses,
-            bool outerCombined = false) {
+            Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+            const List<Clause> &clauses, const ConstructQueue &queue,
+            ConstructQueue::iterator item, bool outerCombined = false) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   Fortran::lower::StatementContext stmtCtx;
 
@@ -1650,8 +1681,8 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
   Fortran::lower::pft::visitAllSymbols(eval, captureImplicitMap);
 
   auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps);
-  genBodyOfTargetOp(converter, symTable, semaCtx, eval, genNested, targetOp,
-                    mapSyms, mapLocs, mapTypes, loc);
+  genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, mapSyms,
+                    mapLocs, mapTypes, loc, queue, item);
   return targetOp;
 }
 
@@ -1659,8 +1690,9 @@ static mlir::omp::TargetDataOp
 genTargetDataOp(Fortran::lower::AbstractConverter &converter,
                 Fortran::lower::SymMap &symTable,
                 Fortran::semantics::SemanticsContext &semaCtx,
-                Fortran::lower::pft::Evaluation &eval, bool genNested,
-                mlir::Location loc, const List<Clause> &clauses) {
+                Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+                const List<Clause> &clauses, const ConstructQueue &queue,
+                ConstructQueue::iterator item) {
   Fortran::lower::StatementContext stmtCtx;
   mlir::omp::TargetDataClauseOps clauseOps;
   llvm::SmallVector<mlir::Type> useDeviceTypes;
@@ -1672,9 +1704,9 @@ genTargetDataOp(Fortran::lower::AbstractConverter &converter,
   auto targetDataOp =
       converter.getFirOpBuilder().create<mlir::omp::TargetDataOp>(loc,
                                                                   clauseOps);
-  genBodyOfTargetDataOp(converter, symTable, semaCtx, eval, genNested,
-                        targetDataOp, useDeviceTypes, useDeviceLocs,
-                        useDeviceSyms, loc);
+  genBodyOfTargetDataOp(converter, symTable, semaCtx, eval, targetDataOp,
+                        useDeviceTypes, useDeviceLocs, useDeviceSyms, loc,
+                        queue, item);
   return targetDataOp;
 }
 
@@ -1683,8 +1715,9 @@ static OpTy
 genTargetEnterExitUpdateDataOp(Fortran::lower::AbstractConverter &converter,
                                Fortran::lower::SymMap &symTable,
                                Fortran::semantics::SemanticsContext &semaCtx,
-                               mlir::Location loc,
-                               const List<Clause> &clauses) {
+                               mlir::Location loc, const List<Clause> &clauses,
+                               const ConstructQueue &queue,
+                               ConstructQueue::iterator item) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   Fortran::lower::StatementContext stmtCtx;
 
@@ -1711,8 +1744,9 @@ static mlir::omp::TaskOp
 genTaskOp(Fortran::lower::AbstractConverter &converter,
           Fortran::lower::SymMap &symTable,
           Fortran::semantics::SemanticsContext &semaCtx,
-          Fortran::lower::pft::Evaluation &eval, bool genNested,
-          mlir::Location loc, const List<Clause> &clauses) {
+          Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+          const List<Clause> &clauses, const ConstructQueue &queue,
+          ConstructQueue::iterator item) {
   Fortran::lower::StatementContext stmtCtx;
   mlir::omp::TaskClauseOps clauseOps;
   genTaskClauses(converter, semaCtx, stmtCtx, clauses, loc, clauseOps);
@@ -1720,26 +1754,25 @@ genTaskOp(Fortran::lower::AbstractConverter &converter,
   return genOpWithBody<mlir::omp::TaskOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_task)
-          .setGenNested(genNested)
           .setClauses(&clauses),
-      clauseOps);
+      queue, item, clauseOps);
 }
 
 static mlir::omp::TaskgroupOp
 genTaskgroupOp(Fortran::lower::AbstractConverter &converter,
                Fortran::lower::SymMap &symTable,
                Fortran::semantics::SemanticsContext &semaCtx,
-               Fortran::lower::pft::Evaluation &eval, bool genNested,
-               mlir::Location loc, const List<Clause> &clauses) {
+               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+               const List<Clause> &clauses, const ConstructQueue &queue,
+               ConstructQueue::iterator item) {
   mlir::omp::TaskgroupClauseOps clauseOps;
   genTaskgroupClauses(converter, semaCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::TaskgroupOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_taskgroup)
-          .setGenNested(genNested)
           .setClauses(&clauses),
-      clauseOps);
+      queue, item, clauseOps);
 }
 
 static mlir::omp::TaskloopOp
@@ -1747,7 +1780,8 @@ genTaskloopOp(Fortran::lower::AbstractConverter &converter,
               Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-              const List<Clause> &clauses) {
+              const List<Clause> &clauses, const ConstructQueue &queue,
+              ConstructQueue::iterator item) {
   TODO(loc, "Taskloop construct");
 }
 
@@ -1756,7 +1790,8 @@ genTaskwaitOp(Fortran::lower::AbstractConverter &converter,
               Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-              const List<Clause> &clauses) {
+              const List<Clause> &clauses, const ConstructQueue &queue,
+              ConstructQueue::iterator item) {
   mlir::omp::TaskwaitClauseOps clauseOps;
   genTaskwaitClauses(converter, semaCtx, clauses, loc, clauseOps);
   return converter.getFirOpBuilder().create<mlir::omp::TaskwaitOp>(loc,
@@ -1767,7 +1802,8 @@ static mlir::omp::TaskyieldOp
 genTaskyieldOp(Fortran::lower::AbstractConverter &converter,
                Fortran::lower::SymMap &symTable,
                Fortran::semantics::SemanticsContext &semaCtx,
-               Fortran::lower::pft::Evaluation &eval, mlir::Location loc) {
+               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+               const ConstructQueue &queue, ConstructQueue::iterator item) {
   return converter.getFirOpBuilder().create<mlir::omp::TaskyieldOp>(loc);
 }
 
@@ -1775,9 +1811,9 @@ static mlir::omp::TeamsOp
 genTeamsOp(Fortran::lower::AbstractConverter &converter,
            Fortran::lower::SymMap &symTable,
            Fortran::semantics::SemanticsContext &semaCtx,
-           Fortran::lower::pft::Evaluation &eval, bool genNested,
-           mlir::Location loc, const List<Clause> &clauses,
-           bool outerCombined = false) {
+           Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+           const List<Clause> &clauses, const ConstructQueue &queue,
+           ConstructQueue::iterator item, bool outerCombined = false) {
   Fortran::lower::StatementContext stmtCtx;
   mlir::omp::TeamsClauseOps clauseOps;
   genTeamsClauses(converter, semaCtx, stmtCtx, clauses, loc, clauseOps);
@@ -1785,10 +1821,9 @@ genTeamsOp(Fortran::lower::AbstractConverter &converter,
   return genOpWithBody<mlir::omp::TeamsOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_teams)
-          .setGenNested(genNested)
           .setOuterCombined(outerCombined)
           .setClauses(&clauses),
-      clauseOps);
+      queue, item, clauseOps);
 }
 
 static mlir::omp::WsloopOp
@@ -1796,7 +1831,8 @@ genWsloopOp(Fortran::lower::AbstractConverter &converter,
             Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
             Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-            const List<Clause> &clauses) {
+            const List<Clause> &clauses, const ConstructQueue &queue,
+            ConstructQueue::iterator item) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   DataSharingProcessor dsp(converter, semaCtx, clauses, eval);
   dsp.processStep1();
@@ -1837,7 +1873,8 @@ genWsloopOp(Fortran::lower::AbstractConverter &converter,
                      .setClauses(&clauses)
                      .setDataSharingProcessor(&dsp)
                      .setReductions(&reductionSyms, &reductionTypes)
-                     .setGenRegionEntryCb(ivCallback));
+                     .setGenRegionEntryCb(ivCallback),
+                 queue, item);
   return wsloopOp;
 }
 
@@ -1845,13 +1882,13 @@ genWsloopOp(Fortran::lower::AbstractConverter &converter,
 // Code generation functions for composite constructs
 //===----------------------------------------------------------------------===//
 
-static void
-genCompositeDistributeParallelDo(Fortran::lower::AbstractConverter &converter,
-                                 Fortran::lower::SymMap &symTable,
-                                 Fortran::semantics::SemanticsContext &semaCtx,
-                                 Fortran::lower::pft::Evaluation &eval,
-                                 const List<Clause> &clauses,
-                                 mlir::Location loc) {
+static void genCompositeDistributeParallelDo(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::lower::SymMap &symTable,
+    Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+    const List<Clause> &clauses, const ConstructQueue &queue,
+    ConstructQueue::iterator item) {
   TODO(loc, "Composite DISTRIBUTE PARALLEL DO");
 }
 
@@ -1859,8 +1896,9 @@ static void genCompositeDistributeParallelDoSimd(
     Fortran::lower::AbstractConverter &converter,
     Fortran::lower::SymMap &symTable,
     Fortran::semantics::SemanticsContext &semaCtx,
-    Fortran::lower::pft::Evaluation &eval, const List<Clause> &clauses,
-    mlir::Location loc) {
+    Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+    const List<Clause> &clauses, const ConstructQueue &queue,
+    ConstructQueue::iterator item) {
   TODO(loc, "Composite DISTRIBUTE PARALLEL DO SIMD");
 }
 
@@ -1869,7 +1907,9 @@ genCompositeDistributeSimd(Fortran::lower::AbstractConverter &converter,
                            Fortran::lower::SymMap &symTable,
                            Fortran::semantics::SemanticsContext &semaCtx,
                            Fortran::lower::pft::Evaluation &eval,
-                           const List<Clause> &clauses, mlir::Location loc) {
+                           mlir::Location loc, const List<Clause> &clauses,
+                           const ConstructQueue &queue,
+                           ConstructQueue::iterator item) {
   TODO(loc, "Composite DISTRIBUTE SIMD");
 }
 
@@ -1877,8 +1917,9 @@ static void genCompositeDoSimd(Fortran::lower::AbstractConverter &converter,
                                Fortran::lower::SymMap &symTable,
                                Fortran::semantics::SemanticsContext &semaCtx,
                                Fortran::lower::pft::Evaluation &eval,
-                               const List<Clause> &clauses,
-                               mlir::Location loc) {
+                               mlir::Location loc, const List<Clause> &clauses,
+                               const ConstructQueue &queue,
+                               ConstructQueue::iterator item) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processTODO<clause::Aligned, clause::Allocate, clause::Linear,
                  clause::Order, clause::Safelen, clause::Simdlen>(
@@ -1891,7 +1932,7 @@ static void genCompositeDoSimd(Fortran::lower::AbstractConverter &converter,
   // When support for vectorization is enabled, then we need to add handling of
   // if clause. Currently if clause can be skipped because we always assume
   // SIMD length = 1.
-  genWsloopOp(converter, symTable, semaCtx, eval, loc, clauses);
+  genWsloopOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
 }
 
 static void
@@ -1899,10 +1940,126 @@ genCompositeTaskloopSimd(Fortran::lower::AbstractConverter &converter,
                          Fortran::lower::SymMap &symTable,
                          Fortran::semantics::SemanticsContext &semaCtx,
                          Fortran::lower::pft::Evaluation &eval,
-                         const List<Clause> &clauses, mlir::Location loc) {
+                         mlir::Location loc, const List<Clause> &clauses,
+                         const ConstructQueue &queue,
+                         ConstructQueue::iterator item) {
   TODO(loc, "Composite TASKLOOP SIMD");
 }
 
+//===----------------------------------------------------------------------===//
+// Dispatch
+//===----------------------------------------------------------------------===//
+
+static void genOMPDispatch(Fortran::lower::AbstractConverter &converter,
+                           Fortran::lower::SymMap &symTable,
+                           Fortran::semantics::SemanticsContext &semaCtx,
+                           Fortran::lower::pft::Evaluation &eval,
+                           mlir::Location loc, const ConstructQueue &queue,
+                           ConstructQueue::iterator item) {
+  assert(item != queue.end());
+  const List<Clause> &clauses = item->clauses;
+
+  switch (llvm::omp::Directive dir = item->id) {
+  case llvm::omp::Directive::OMPD_distribute:
+    genDistributeOp(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                    item);
+    break;
+  case llvm::omp::Directive::OMPD_do:
+    genWsloopOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_loop:
+  case llvm::omp::Directive::OMPD_masked:
+    TODO(loc, "Unhandled loop directive (" +
+                  llvm::omp::getOpenMPDirectiveName(dir) + ")");
+    break;
+  case llvm::omp::Directive::OMPD_master:
+    genMasterOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_ordered:
+    genOrderedRegionOp(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                       item);
+    break;
+  case llvm::omp::Directive::OMPD_parallel:
+    genParallelOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item,
+                  /*outerCombined=*/false);
+    break;
+  case llvm::omp::Directive::OMPD_sections:
+    genSectionsOp(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                  item);
+    break;
+  case llvm::omp::Directive::OMPD_simd:
+    genSimdOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_single:
+    genSingleOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_target:
+    genTargetOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item,
+                /*outerCombined=*/false);
+    break;
+  case llvm::omp::Directive::OMPD_target_data:
+    genTargetDataOp(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                    item);
+    break;
+  case llvm::omp::Directive::OMPD_target_enter_data:
+    genTargetEnterExitUpdateDataOp<mlir::omp::TargetEnterDataOp>(
+        converter, symTable, semaCtx, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_target_exit_data:
+    genTargetEnterExitUpdateDataOp<mlir::omp::TargetExitDataOp>(
+        converter, symTable, semaCtx, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_target_update:
+    genTargetEnterExitUpdateDataOp<mlir::omp::TargetUpdateOp>(
+        converter, symTable, semaCtx, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_task:
+    genTaskOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_taskgroup:
+    genTaskgroupOp(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                   item);
+    break;
+  case llvm::omp::Directive::OMPD_taskloop:
+    genTaskloopOp(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                  item);
+    break;
+  case llvm::omp::Directive::OMPD_teams:
+    genTeamsOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  // case llvm::omp::Directive::OMPD_workdistribute:
+  case llvm::omp::Directive::OMPD_workshare:
+    // FIXME: Workshare is not a commonly used OpenMP construct, an
+    // implementation for this feature will come later. For the codes
+    // that use this construct, add a single construct for now.
+    genSingleOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  // Composite constructs
+  case llvm::omp::Directive::OMPD_distribute_parallel_do:
+    genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval, loc,
+                                     clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_distribute_parallel_do_simd:
+    genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval,
+                                         loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_distribute_simd:
+    genCompositeDistributeSimd(converter, symTable, semaCtx, eval, loc, clauses,
+                               queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_do_simd:
+    genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                       item);
+    break;
+  case llvm::omp::Directive::OMPD_taskloop_simd:
+    genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, loc, clauses,
+                             queue, item);
+    break;
+  default:
+    break;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // OpenMPDeclarativeConstruct visitors
 //===----------------------------------------------------------------------===//
@@ -2013,36 +2170,45 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
       semaCtx);
   mlir::Location currentLocation = converter.genLocation(directive.source);
 
+  ConstructQueue queue{{DirectiveWithClauses{directive.v, clauses}}};
+
   switch (directive.v) {
   default:
     break;
   case llvm::omp::Directive::OMPD_barrier:
-    genBarrierOp(converter, symTable, semaCtx, eval, currentLocation);
+    genBarrierOp(converter, symTable, semaCtx, eval, currentLocation, queue,
+                 queue.begin());
     break;
   case llvm::omp::Directive::OMPD_taskwait:
-    genTaskwaitOp(converter, symTable, semaCtx, eval, currentLocation, clauses);
+    genTaskwaitOp(converter, symTable, semaCtx, eval, currentLocation, clauses,
+                  queue, queue.begin());
     break;
   case llvm::omp::Directive::OMPD_taskyield:
-    genTaskyieldOp(converter, symTable, semaCtx, eval, currentLocation);
+    genTaskyieldOp(converter, symTable, semaCtx, eval, currentLocation, queue,
+                   queue.begin());
     break;
   case llvm::omp::Directive::OMPD_target_data:
-    genTargetDataOp(converter, symTable, semaCtx, eval, /*genNested=*/true,
-                    currentLocation, clauses);
+    genTargetDataOp(converter, symTable, semaCtx, eval, currentLocation,
+                    clauses, queue, queue.begin());
     break;
   case llvm::omp::Directive::OMPD_target_enter_data:
     genTargetEnterExitUpdateDataOp<mlir::omp::TargetEnterDataOp>(
-        converter, symTable, semaCtx, currentLocation, clauses);
+        converter, symTable, semaCtx, currentLocation, clauses, queue,
+        queue.begin());
     break;
   case llvm::omp::Directive::OMPD_target_exit_data:
     genTargetEnterExitUpdateDataOp<mlir::omp::TargetExitDataOp>(
-        converter, symTable, semaCtx, currentLocation, clauses);
+        converter, symTable, semaCtx, currentLocation, clauses, queue,
+        queue.begin());
     break;
   case llvm::omp::Directive::OMPD_target_update:
     genTargetEnterExitUpdateDataOp<mlir::omp::TargetUpdateOp>(
-        converter, symTable, semaCtx, currentLocation, clauses);
+        converter, symTable, semaCtx, currentLocation, clauses, queue,
+        queue.begin());
     break;
   case llvm::omp::Directive::OMPD_ordered:
-    genOrderedOp(converter, symTable, semaCtx, eval, currentLocation, clauses);
+    genOrderedOp(converter, symTable, semaCtx, eval, currentLocation, clauses,
+                 queue, queue.begin());
     break;
   }
 }
@@ -2066,8 +2232,11 @@ genOMP(Fortran::lower::AbstractConverter &converter,
                             [&](auto &&s) { return makeClause(s.v, semaCtx); })
                  : List<Clause>{};
   mlir::Location currentLocation = converter.genLocation(verbatim.source);
+
+  ConstructQueue queue{
+      DirectiveWithClauses{llvm::omp::Directive::OMPD_flush, clauses}};
   genFlushOp(converter, symTable, semaCtx, eval, currentLocation, objects,
-             clauses);
+             clauses, queue, queue.begin());
 }
 
 static void
@@ -2210,75 +2379,13 @@ genOMP(Fortran::lower::AbstractConverter &converter,
     }
   }
 
-  std::optional<llvm::omp::Directive> nextDir = origDirective;
-  bool outermostLeafConstruct = true;
-  while (nextDir) {
-    llvm::omp::Directive leafDir;
-    std::tie(leafDir, nextDir) = splitCombinedDirective(*nextDir);
-    const bool genNested = !nextDir;
-    const bool outerCombined = outermostLeafConstruct && nextDir.has_value();
-    switch (leafDir) {
-    case llvm::omp::Directive::OMPD_master:
-      // 2.16 MASTER construct.
-      genMasterOp(converter, symTable, semaCtx, eval, genNested,
-                  currentLocation);
-      break;
-    case llvm::omp::Directive::OMPD_ordered:
-      // 2.17.9 ORDERED construct.
-      genOrderedRegionOp(converter, symTable, semaCtx, eval, genNested,
-                         currentLocation, clauses);
-      break;
-    case llvm::omp::Directive::OMPD_parallel:
-      // 2.6 PARALLEL construct.
-      genParallelOp(converter, symTable, semaCtx, eval, genNested,
-                    currentLocation, clauses, outerCombined);
-      break;
-    case llvm::omp::Directive::OMPD_single:
-      // 2.8.2 SINGLE construct.
-      genSingleOp(converter, symTable, semaCtx, eval, genNested,
-                  currentLocation, clauses);
-      break;
-    case llvm::omp::Directive::OMPD_target:
-      // 2.12.5 TARGET construct.
-      genTargetOp(converter, symTable, semaCtx, eval, genNested,
-                  currentLocation, clauses, outerCombined);
-      break;
-    case llvm::omp::Directive::OMPD_target_data:
-      // 2.12.2 TARGET DATA construct.
-      genTargetDataOp(converter, symTable, semaCtx, eval, genNested,
-                      currentLocation, clauses);
-      break;
-    case llvm::omp::Directive::OMPD_task:
-      // 2.10.1 TASK construct.
-      genTaskOp(converter, symTable, semaCtx, eval, genNested, currentLocation,
-                clauses);
-      break;
-    case llvm::omp::Directive::OMPD_taskgroup:
-      // 2.17.6 TASKGROUP construct.
-      genTaskgroupOp(converter, symTable, semaCtx, eval, genNested,
-                     currentLocation, clauses);
-      break;
-    case llvm::omp::Directive::OMPD_teams:
-      // 2.7 TEAMS construct.
-      // FIXME Pass the outerCombined argument or rename it to better describe
-      // what it represents if it must always be `false` in this context.
-      genTeamsOp(converter, symTable, semaCtx, eval, genNested, currentLocation,
-                 clauses);
-      break;
-    case llvm::omp::Directive::OMPD_workshare:
-      // 2.8.3 WORKSHARE construct.
-      // FIXME: Workshare is not a commonly used OpenMP construct, an
-      // implementation for this feature will come later. For the codes
-      // that use this construct, add a single construct for now.
-      genSingleOp(converter, symTable, semaCtx, eval, genNested,
-                  currentLocation, clauses);
-      break;
-    default:
-      llvm_unreachable("Unexpected block construct");
-      break;
-    }
-    outermostLeafConstruct = false;
-  }
+  llvm::omp::Directive directive =
+      std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v;
+  ConstructQueue queue;
+  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
+                         directive, clauses, queue);
+  genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
+                 queue.begin());
 }
 
 static void
@@ -2291,10 +2398,15 @@ genOMP(Fortran::lower::AbstractConverter &converter,
       std::get<Fortran::parser::OmpCriticalDirective>(criticalConstruct.t);
   List<Clause> clauses =
       makeClauses(std::get<Fortran::parser::OmpClauseList>(cd.t), semaCtx);
+
+  ConstructQueue queue;
+  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
+                         llvm::omp::Directive::OMPD_critical, clauses, queue);
+
   const auto &name = std::get<std::optional<Fortran::parser::Name>>(cd.t);
   mlir::Location currentLocation = converter.getCurrentLocation();
-  genCriticalOp(converter, symTable, semaCtx, eval, /*genNested=*/true,
-                currentLocation, clauses, name);
+  genCriticalOp(converter, symTable, semaCtx, eval, currentLocation, clauses,
+                queue, queue.begin(), name);
 }
 
 static void
@@ -2315,14 +2427,6 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
       std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t);
   List<Clause> clauses = makeClauses(
       std::get<Fortran::parser::OmpClauseList>(beginLoopDirective.t), semaCtx);
-  mlir::Location currentLocation =
-      converter.genLocation(beginLoopDirective.source);
-  const auto origDirective =
-      std::get<Fortran::parser::OmpLoopDirective>(beginLoopDirective.t).v;
-
-  assert(llvm::omp::loopConstructSet.test(origDirective) &&
-         "Expected loop construct");
-
   if (auto &endLoopDirective =
           std::get<std::optional<Fortran::parser::OmpEndLoopDirective>>(
               loopConstruct.t)) {
@@ -2331,101 +2435,16 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
         semaCtx));
   }
 
-  std::optional<llvm::omp::Directive> nextDir = origDirective;
-  while (nextDir) {
-    llvm::omp::Directive leafDir;
-    std::tie(leafDir, nextDir) = splitCombinedDirective(*nextDir);
-    if (llvm::omp::compositeConstructSet.test(leafDir)) {
-      assert(!nextDir && "Composite construct cannot be split");
-      switch (leafDir) {
-      case llvm::omp::Directive::OMPD_distribute_parallel_do:
-        // 2.9.4.3 DISTRIBUTE PARALLEL Worksharing-Loop construct.
-        genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval,
-                                         clauses, currentLocation);
-        break;
-      case llvm::omp::Directive::OMPD_distribute_parallel_do_simd:
-        // 2.9.4.4 DISTRIBUTE PARALLEL Worksharing-Loop SIMD construct.
-        genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval,
-                                             clauses, currentLocation);
-        break;
-      case llvm::omp::Directive::OMPD_distribute_simd:
-        // 2.9.4.2 DISTRIBUTE SIMD construct.
-        genCompositeDistributeSimd(converter, symTable, semaCtx, eval, clauses,
-                                   currentLocation);
-        break;
-      case llvm::omp::Directive::OMPD_do_simd:
-        // 2.9.3.2 Worksharing-Loop SIMD construct.
-        genCompositeDoSimd(converter, symTable, semaCtx, eval, clauses,
-                           currentLocation);
-        break;
-      case llvm::omp::Directive::OMPD_taskloop_simd:
-        // 2.10.3 TASKLOOP SIMD construct.
-        genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, clauses,
-                                 currentLocation);
-        break;
-      default:
-        llvm_unreachable("Unexpected composite construct");
-      }
-    } else {
-      const bool genNested = !nextDir;
-      switch (leafDir) {
-      case llvm::omp::Directive::OMPD_distribute:
-        // 2.9.4.1 DISTRIBUTE construct.
-        genDistributeOp(converter, symTable, semaCtx, eval, genNested,
-                        currentLocation, clauses);
-        break;
-      case llvm::omp::Directive::OMPD_do:
-        // 2.9.2 Worksharing-Loop construct.
-        genWsloopOp(converter, symTable, semaCtx, eval, currentLocation,
-                    clauses);
-        break;
-      case llvm::omp::Directive::OMPD_parallel:
-        // 2.6 PARALLEL construct.
-        // FIXME This is not necessarily always the outer leaf construct of a
-        // combined construct in this constext (e.g. distribute parallel do).
-        // Maybe rename the argument if it represents something else or
-        // initialize it properly.
-        genParallelOp(converter, symTable, semaCtx, eval, genNested,
-                      currentLocation, clauses,
-                      /*outerCombined=*/true);
-        break;
-      case llvm::omp::Directive::OMPD_simd:
-        // 2.9.3.1 SIMD construct.
-        genSimdOp(converter, symTable, semaCtx, eval, currentLocation, clauses);
-        break;
-      case llvm::omp::Directive::OMPD_target:
-        // 2.12.5 TARGET construct.
-        genTargetOp(converter, symTable, semaCtx, eval, genNested,
-                    currentLocation, clauses, /*outerCombined=*/true);
-        break;
-      case llvm::omp::Directive::OMPD_taskloop:
-        // 2.10.2 TASKLOOP construct.
-        genTaskloopOp(converter, symTable, semaCtx, eval, currentLocation,
-                      clauses);
-        break;
-      case llvm::omp::Directive::OMPD_teams:
-        // 2.7 TEAMS construct.
-        // FIXME This is not necessarily always the outer leaf construct of a
-        // combined construct in this constext (e.g. target teams distribute).
-        // Maybe rename the argument if it represents something else or
-        // initialize it properly.
-        genTeamsOp(converter, symTable, semaCtx, eval, genNested,
-                   currentLocation, clauses, /*outerCombined=*/true);
-        break;
-      case llvm::omp::Directive::OMPD_loop:
-      case llvm::omp::Directive::OMPD_masked:
-      case llvm::omp::Directive::OMPD_master:
-      case llvm::omp::Directive::OMPD_tile:
-      case llvm::omp::Directive::OMPD_unroll:
-        TODO(currentLocation, "Unhandled loop directive (" +
-                                  llvm::omp::getOpenMPDirectiveName(leafDir) +
-                                  ")");
-        break;
-      default:
-        llvm_unreachable("Unexpected loop construct");
-      }
-    }
-  }
+  mlir::Location currentLocation =
+      converter.genLocation(beginLoopDirective.source);
+
+  llvm::omp::Directive directive =
+      std::get<parser::OmpLoopDirective>(beginLoopDirective.t).v;
+  ConstructQueue queue;
+  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
+                         directive, clauses, queue);
+  genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
+                 queue.begin());
 }
 
 static void
@@ -2434,8 +2453,11 @@ genOMP(Fortran::lower::AbstractConverter &converter,
        Fortran::semantics::SemanticsContext &semaCtx,
        Fortran::lower::pft::Evaluation &eval,
        const Fortran::parser::OpenMPSectionConstruct &sectionConstruct) {
-  // SECTION constructs are handled as a part of SECTIONS.
-  llvm_unreachable("Unexpected standalone OMP SECTION");
+  mlir::Location loc = converter.getCurrentLocation();
+  ConstructQueue queue{
+      DirectiveWithClauses{llvm::omp::Directive::OMPD_section}};
+  genSectionOp(converter, symTable, semaCtx, eval, loc,
+               /*clauses=*/{}, queue, queue.begin());
 }
 
 static void
@@ -2454,39 +2476,15 @@ genOMP(Fortran::lower::AbstractConverter &converter,
   clauses.append(makeClauses(
       std::get<Fortran::parser::OmpClauseList>(endSectionsDirective.t),
       semaCtx));
-
-  // Process clauses before optional omp.parallel, so that new variables are
-  // allocated outside of the parallel region
   mlir::Location currentLocation = converter.getCurrentLocation();
-  mlir::omp::SectionsClauseOps clauseOps;
-  genSectionsClauses(converter, semaCtx, clauses, currentLocation, clauseOps);
-
-  // Parallel wrapper of PARALLEL SECTIONS construct
-  llvm::omp::Directive dir =
-      std::get<Fortran::parser::OmpSectionsDirective>(beginSectionsDirective.t)
-          .v;
-  if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
-    genParallelOp(converter, symTable, semaCtx, eval,
-                  /*genNested=*/false, currentLocation, clauses,
-                  /*outerCombined=*/true);
-  }
 
-  // SECTIONS construct.
-  genSectionsOp(converter, symTable, semaCtx, eval, currentLocation, clauseOps);
-
-  // Generate nested SECTION operations recursively.
-  const auto &sectionBlocks =
-      std::get<Fortran::parser::OmpSectionBlocks>(sectionsConstruct.t);
-  auto &firOpBuilder = converter.getFirOpBuilder();
-  auto ip = firOpBuilder.saveInsertionPoint();
-  for (const auto &[nblock, neval] :
-       llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) {
-    symTable.pushScope();
-    genSectionOp(converter, symTable, semaCtx, neval, /*genNested=*/true,
-                 currentLocation, clauses);
-    symTable.popScope();
-    firOpBuilder.restoreInsertionPoint(ip);
-  }
+  llvm::omp::Directive directive =
+      std::get<parser::OmpSectionsDirective>(beginSectionsDirective.t).v;
+  ConstructQueue queue;
+  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
+                         directive, clauses, queue);
+  genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
+                 queue.begin());
 }
 
 static void genOMP(Fortran::lower::AbstractConverter &converter,
diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
new file mode 100644
index 00000000000000..f8a18a1bed7ae6
--- /dev/null
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
@@ -0,0 +1,985 @@
+#ifndef LLVM_FRONTEND_OPENMP_COMPOUNDSPLITTERT_H
+#define LLVM_FRONTEND_OPENMP_COMPOUNDSPLITTERT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Frontend/OpenMP/ClauseT.h"
+#include "llvm/Frontend/OpenMP/OMP.h"
+
+#include <iterator>
+#include <list>
+#include <optional>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <variant>
+
+static inline llvm::ArrayRef<llvm::omp::Directive> getWorksharing() {
+  static llvm::omp::Directive worksharing[] = {
+      llvm::omp::Directive::OMPD_do,     llvm::omp::Directive::OMPD_for,
+      llvm::omp::Directive::OMPD_scope,  llvm::omp::Directive::OMPD_sections,
+      llvm::omp::Directive::OMPD_single, llvm::omp::Directive::OMPD_workshare,
+  };
+  return worksharing;
+}
+
+static inline llvm::ArrayRef<llvm::omp::Directive> getWorksharingLoop() {
+  static llvm::omp::Directive worksharingLoop[] = {
+      llvm::omp::Directive::OMPD_do,
+      llvm::omp::Directive::OMPD_for,
+  };
+  return worksharingLoop;
+}
+
+namespace detail {
+template <typename Container, typename Predicate>
+typename std::remove_reference_t<Container>::iterator
+find_unique(Container &&container, Predicate &&pred) {
+  auto first = std::find_if(container.begin(), container.end(), pred);
+  if (first == container.end())
+    return first;
+  auto second = std::find_if(std::next(first), container.end(), pred);
+  if (second == container.end())
+    return first;
+  return container.end();
+}
+} // namespace detail
+
+namespace tomp {
+
+template <typename ClauseType> struct DirectiveWithClauses {
+  llvm::omp::Directive id = llvm::omp::Directive::OMPD_unknown;
+  tomp::type::ListT<ClauseType> clauses;
+};
+
+template <typename ClauseType, typename HelperType>
+struct ConstructDecompositionT {
+  using ClauseTy = ClauseType;
+
+  using TypeTy = typename ClauseTy::TypeTy;
+  using IdTy = typename ClauseTy::IdTy;
+  using ExprTy = typename ClauseTy::ExprTy;
+  using HelperTy = HelperType;
+  using ObjectTy = tomp::ObjectT<IdTy, ExprTy>;
+
+  using ClauseSet = llvm::DenseSet<const ClauseTy *>;
+
+  ConstructDecompositionT(uint32_t ver, HelperType &hlp,
+                          llvm::omp::Directive dir,
+                          llvm::ArrayRef<ClauseTy> clauses)
+      : version(ver), construct(dir), helper(hlp) {
+    for (const ClauseTy &clause : clauses)
+      nodes.push_back(&clause);
+
+    bool success = split();
+    if (success) {
+      // Copy the broken down directives with their clauses to the
+      // output list. Copy by value, since we don't own the storage
+      // with the input clauses, and the internal representation uses
+      // clause addresses.
+      for (auto &leaf : leafs) {
+        output.push_back({leaf.id});
+        auto &dwc = output.back();
+        for (const ClauseTy *c : leaf.clauses)
+          dwc.clauses.push_back(*c);
+      }
+    }
+  }
+
+  tomp::ListT<DirectiveWithClauses<ClauseType>> output;
+
+private:
+  bool split();
+
+  struct LeafReprInternal {
+    llvm::omp::Directive id = llvm::omp::Directive::OMPD_unknown;
+    tomp::type::ListT<const ClauseTy *> clauses;
+  };
+
+  LeafReprInternal *findDirective(llvm::omp::Directive dirId) {
+    auto found = llvm::find_if(
+        leafs, [&](const LeafReprInternal &leaf) { return leaf.id == dirId; });
+    return found != leafs.end() ? &*found : nullptr;
+  }
+
+  ClauseSet *findClausesWith(const ObjectTy &object) {
+    if (auto found = syms.find(object.id()); found != syms.end())
+      return &found->second;
+    return nullptr;
+  }
+
+  template <typename S>
+  ClauseTy *makeClause(llvm::omp::Clause clauseId, S &&specific) {
+    implicit.push_back(ClauseTy{clauseId, std::move(specific)});
+    return &implicit.back();
+  }
+
+  void addClauseSymsToMap(const ObjectTy &object, const ClauseTy *);
+  void addClauseSymsToMap(const tomp::ObjectListT<IdTy, ExprTy> &objects,
+                          const ClauseTy *);
+  void addClauseSymsToMap(const TypeTy &item, const ClauseTy *);
+  void addClauseSymsToMap(const ExprTy &item, const ClauseTy *);
+  void addClauseSymsToMap(const tomp::clause::MapT<TypeTy, IdTy, ExprTy> &item,
+                          const ClauseTy *);
+
+  template <typename U>
+  void addClauseSymsToMap(const std::optional<U> &item, const ClauseTy *);
+  template <typename U>
+  void addClauseSymsToMap(const tomp::ListT<U> &item, const ClauseTy *);
+  template <typename... U, size_t... Is>
+  void addClauseSymsToMap(const std::tuple<U...> &item, const ClauseTy *,
+                          std::index_sequence<Is...> = {});
+  template <typename U>
+  std::enable_if_t<std::is_enum_v<llvm::remove_cvref_t<U>>, void>
+  addClauseSymsToMap(U &&item, const ClauseTy *);
+
+  template <typename U>
+  std::enable_if_t<llvm::remove_cvref_t<U>::EmptyTrait::value, void>
+  addClauseSymsToMap(U &&item, const ClauseTy *);
+
+  template <typename U>
+  std::enable_if_t<llvm::remove_cvref_t<U>::IncompleteTrait::value, void>
+  addClauseSymsToMap(U &&item, const ClauseTy *);
+
+  template <typename U>
+  std::enable_if_t<llvm::remove_cvref_t<U>::WrapperTrait::value, void>
+  addClauseSymsToMap(U &&item, const ClauseTy *);
+
+  template <typename U>
+  std::enable_if_t<llvm::remove_cvref_t<U>::TupleTrait::value, void>
+  addClauseSymsToMap(U &&item, const ClauseTy *);
+
+  template <typename U>
+  std::enable_if_t<llvm::remove_cvref_t<U>::UnionTrait::value, void>
+  addClauseSymsToMap(U &&item, const ClauseTy *);
+
+  // Apply a clause to the only directive that allows it. If there are no
+  // directives that allow it, or if there is more that one, do not apply
+  // anything and return false, otherwise return true.
+  bool applyToUnique(const ClauseTy *node);
+
+  // Apply a clause to the first directive in given range that allows it.
+  // If such a directive does not exist, return false, otherwise return true.
+  template <typename Iterator>
+  bool applyToFirst(const ClauseTy *node, llvm::iterator_range<Iterator> range);
+
+  // Apply a clause to the innermost directive that allows it. If such a
+  // directive does not exist, return false, otherwise return true.
+  bool applyToInnermost(const ClauseTy *node);
+
+  // Apply a clause to the outermost directive that allows it. If such a
+  // directive does not exist, return false, otherwise return true.
+  bool applyToOutermost(const ClauseTy *node);
+
+  template <typename Predicate>
+  bool applyIf(const ClauseTy *node, Predicate shouldApply);
+
+  bool applyToAll(const ClauseTy *node);
+
+  template <typename Clause>
+  bool applyClause(Clause &&clause, const ClauseTy *node);
+
+  bool applyClause(const tomp::clause::CollapseT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::PrivateT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool
+  applyClause(const tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy> &clause,
+              const ClauseTy *);
+  bool
+  applyClause(const tomp::clause::LastprivateT<TypeTy, IdTy, ExprTy> &clause,
+              const ClauseTy *);
+  bool applyClause(const tomp::clause::SharedT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::DefaultT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool
+  applyClause(const tomp::clause::ThreadLimitT<TypeTy, IdTy, ExprTy> &clause,
+              const ClauseTy *);
+  bool applyClause(const tomp::clause::OrderT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::AllocateT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::ReductionT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::IfT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::LinearT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::NowaitT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+
+  uint32_t version;
+  llvm::omp::Directive construct;
+  HelperType &helper;
+  ListT<LeafReprInternal> leafs;
+  tomp::ListT<const ClauseTy *> nodes;
+  std::list<ClauseTy> implicit; // Container for materialized implicit clauses.
+                                // Inserting must preserve element addresses.
+  llvm::DenseMap<IdTy, ClauseSet> syms;
+  llvm::DenseSet<IdTy> mapBases;
+};
+
+// Deduction guide
+template <typename ClauseType, typename HelperType>
+ConstructDecompositionT(uint32_t, HelperType &, llvm::omp::Directive,
+                        llvm::ArrayRef<ClauseType>)
+    -> ConstructDecompositionT<ClauseType, HelperType>;
+
+template <typename C, typename H>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(const ObjectTy &object,
+                                                       const ClauseTy *node) {
+  syms[object.id()].insert(node);
+}
+
+template <typename C, typename H>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(
+    const tomp::ObjectListT<IdTy, ExprTy> &objects, const ClauseTy *node) {
+  for (auto &object : objects)
+    syms[object.id()].insert(node);
+}
+
+template <typename C, typename H>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(const TypeTy &item,
+                                                       const ClauseTy *node) {
+  // Nothing to do for types.
+}
+
+template <typename C, typename H>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(const ExprTy &item,
+                                                       const ClauseTy *node) {
+  // Nothing to do for expressions.
+}
+
+template <typename C, typename H>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(
+    const tomp::clause::MapT<TypeTy, IdTy, ExprTy> &item,
+    const ClauseTy *node) {
+  auto &objects = std::get<tomp::ObjectListT<IdTy, ExprTy>>(item.t);
+  addClauseSymsToMap(objects, node);
+  for (auto &object : objects) {
+    if (auto base = helper.getBaseObject(object))
+      mapBases.insert(base->id());
+  }
+}
+
+template <typename C, typename H>
+template <typename U>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(
+    const std::optional<U> &item, const ClauseTy *node) {
+  if (item)
+    addClauseSymsToMap(*item, node);
+}
+
+template <typename C, typename H>
+template <typename U>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(
+    const tomp::ListT<U> &item, const ClauseTy *node) {
+  for (auto &s : item)
+    addClauseSymsToMap(s, node);
+}
+
+template <typename C, typename H>
+template <typename... U, size_t... Is>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(
+    const std::tuple<U...> &item, const ClauseTy *node,
+    std::index_sequence<Is...>) {
+  (void)node; // Silence strange warning from GCC.
+  (addClauseSymsToMap(std::get<Is>(item), node), ...);
+}
+
+template <typename C, typename H>
+template <typename U>
+std::enable_if_t<std::is_enum_v<llvm::remove_cvref_t<U>>, void>
+ConstructDecompositionT<C, H>::addClauseSymsToMap(U &&item,
+                                                  const ClauseTy *node) {
+  // Nothing to do for enums.
+}
+
+template <typename C, typename H>
+template <typename U>
+std::enable_if_t<llvm::remove_cvref_t<U>::EmptyTrait::value, void>
+ConstructDecompositionT<C, H>::addClauseSymsToMap(U &&item,
+                                                  const ClauseTy *node) {
+  // Nothing to do for an empty class.
+}
+
+template <typename C, typename H>
+template <typename U>
+std::enable_if_t<llvm::remove_cvref_t<U>::IncompleteTrait::value, void>
+ConstructDecompositionT<C, H>::addClauseSymsToMap(U &&item,
+                                                  const ClauseTy *node) {
+  // Nothing to do for an incomplete class (they're empty).
+}
+
+template <typename C, typename H>
+template <typename U>
+std::enable_if_t<llvm::remove_cvref_t<U>::WrapperTrait::value, void>
+ConstructDecompositionT<C, H>::addClauseSymsToMap(U &&item,
+                                                  const ClauseTy *node) {
+  addClauseSymsToMap(item.v, node);
+}
+
+template <typename C, typename H>
+template <typename U>
+std::enable_if_t<llvm::remove_cvref_t<U>::TupleTrait::value, void>
+ConstructDecompositionT<C, H>::addClauseSymsToMap(U &&item,
+                                                  const ClauseTy *node) {
+  constexpr size_t tuple_size =
+      std::tuple_size_v<llvm::remove_cvref_t<decltype(item.t)>>;
+  addClauseSymsToMap(item.t, node, std::make_index_sequence<tuple_size>{});
+}
+
+template <typename C, typename H>
+template <typename U>
+std::enable_if_t<llvm::remove_cvref_t<U>::UnionTrait::value, void>
+ConstructDecompositionT<C, H>::addClauseSymsToMap(U &&item,
+                                                  const ClauseTy *node) {
+  std::visit([&](auto &&s) { addClauseSymsToMap(s, node); }, item.u);
+}
+
+// Apply a clause to the only directive that allows it. If there are no
+// directives that allow it, or if there is more that one, do not apply
+// anything and return false, otherwise return true.
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyToUnique(const ClauseTy *node) {
+  auto unique = detail::find_unique(leafs, [=](const auto &dirInfo) {
+    return llvm::omp::isAllowedClauseForDirective(dirInfo.id, node->id,
+                                                  version);
+  });
+
+  if (unique != leafs.end()) {
+    unique->clauses.push_back(node);
+    return true;
+  }
+  return false;
+}
+
+// Apply a clause to the first directive in given range that allows it.
+// If such a directive does not exist, return false, otherwise return true.
+template <typename C, typename H>
+template <typename Iterator>
+bool ConstructDecompositionT<C, H>::applyToFirst(
+    const ClauseTy *node, llvm::iterator_range<Iterator> range) {
+  if (range.empty())
+    return false;
+
+  for (auto &dwc : range) {
+    if (!llvm::omp::isAllowedClauseForDirective(dwc.id, node->id, version))
+      continue;
+    dwc.clauses.push_back(node);
+    return true;
+  }
+  return false;
+}
+
+// Apply a clause to the innermost directive that allows it. If such a
+// directive does not exist, return false, otherwise return true.
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyToInnermost(const ClauseTy *node) {
+  return applyToFirst(node, llvm::reverse(leafs));
+}
+
+// Apply a clause to the outermost directive that allows it. If such a
+// directive does not exist, return false, otherwise return true.
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyToOutermost(const ClauseTy *node) {
+  return applyToFirst(node, llvm::iterator_range(leafs));
+}
+
+template <typename C, typename H>
+template <typename Predicate>
+bool ConstructDecompositionT<C, H>::applyIf(const ClauseTy *node,
+                                            Predicate shouldApply) {
+  bool applied = false;
+  for (auto &dwc : leafs) {
+    if (!llvm::omp::isAllowedClauseForDirective(dwc.id, node->id, version))
+      continue;
+    if (!shouldApply(dwc))
+      continue;
+    dwc.clauses.push_back(node);
+    applied = true;
+  }
+
+  return applied;
+}
+
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyToAll(const ClauseTy *node) {
+  return applyIf(node, [](auto) { return true; });
+}
+
+template <typename C, typename H>
+template <typename Clause>
+bool ConstructDecompositionT<C, H>::applyClause(Clause &&clause,
+                                                const ClauseTy *node) {
+  // The default behavior is to find the unique directive to which the
+  // given clause may be applied. If there are no such directives, or
+  // if there are multiple ones, flag an error.
+  // From "OpenMP Application Programming Interface", Version 5.2:
+  // S Some clauses are permitted only on a single leaf construct of the
+  // S combined or composite construct, in which case the effect is as if
+  // S the clause is applied to that specific construct. (p339, 31-33)
+  if (applyToUnique(node))
+    return true;
+
+  return false;
+}
+
+// COLLAPSE
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::CollapseT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // Apply COLLAPSE to the innermost directive. If it's not one that
+  // allows it flag an error.
+  if (!leafs.empty()) {
+    auto &last = leafs.back();
+
+    if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) {
+      last.clauses.push_back(node);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+// PRIVATE
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::PrivateT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  return applyToInnermost(node);
+}
+
+// FIRSTPRIVATE
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  bool applied = false;
+
+  // S Section 17.2
+  // S The effect of the firstprivate clause is as if it is applied to one
+  // S or more leaf constructs as follows:
+
+  // S - To the distribute construct if it is among the constituent constructs;
+  // S - To the teams construct if it is among the constituent constructs and
+  // S   the distribute construct is not;
+  auto hasDistribute = findDirective(llvm::omp::OMPD_distribute);
+  auto hasTeams = findDirective(llvm::omp::OMPD_teams);
+  if (hasDistribute != nullptr) {
+    hasDistribute->clauses.push_back(node);
+    applied = true;
+    // S If the teams construct is among the constituent constructs and the
+    // S effect is not as if the firstprivate clause is applied to it by the
+    // S above rules, then the effect is as if the shared clause with the
+    // S same list item is applied to the teams construct.
+    if (hasTeams != nullptr) {
+      auto *shared = makeClause(
+          llvm::omp::Clause::OMPC_shared,
+          tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/clause.v});
+      hasTeams->clauses.push_back(shared);
+    }
+  } else if (hasTeams != nullptr) {
+    hasTeams->clauses.push_back(node);
+    applied = true;
+  }
+
+  // S - To a worksharing construct that accepts the clause if one is among
+  // S   the constituent constructs;
+  auto findWorksharing = [&]() {
+    auto worksharing = getWorksharing();
+    for (auto &dwc : leafs) {
+      auto found = llvm::find(worksharing, dwc.id);
+      if (found != std::end(worksharing))
+        return &dwc;
+    }
+    return static_cast<typename decltype(leafs)::value_type *>(nullptr);
+  };
+
+  auto hasWorksharing = findWorksharing();
+  if (hasWorksharing != nullptr) {
+    hasWorksharing->clauses.push_back(node);
+    applied = true;
+  }
+
+  // S - To the taskloop construct if it is among the constituent constructs;
+  auto hasTaskloop = findDirective(llvm::omp::OMPD_taskloop);
+  if (hasTaskloop != nullptr) {
+    hasTaskloop->clauses.push_back(node);
+    applied = true;
+  }
+
+  // S - To the parallel construct if it is among the constituent constructs
+  // S   and neither a taskloop construct nor a worksharing construct that
+  // S   accepts the clause is among them;
+  auto hasParallel = findDirective(llvm::omp::OMPD_parallel);
+  if (hasParallel != nullptr) {
+    if (hasTaskloop == nullptr && hasWorksharing == nullptr) {
+      hasParallel->clauses.push_back(node);
+      applied = true;
+    } else {
+      // S If the parallel construct is among the constituent constructs and
+      // S the effect is not as if the firstprivate clause is applied to it by
+      // S the above rules, then the effect is as if the shared clause with
+      // S the same list item is applied to the parallel construct.
+      auto *shared = makeClause(
+          llvm::omp::Clause::OMPC_shared,
+          tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/clause.v});
+      hasParallel->clauses.push_back(shared);
+    }
+  }
+
+  // S - To the target construct if it is among the constituent constructs
+  // S   and the same list item neither appears in a lastprivate clause nor
+  // S   is the base variable or base pointer of a list item that appears in
+  // S   a map clause.
+  auto inLastprivate = [&](const ObjectTy &object) {
+    if (ClauseSet *set = findClausesWith(object)) {
+      return llvm::find_if(*set, [](const ClauseTy *c) {
+               return c->id == llvm::omp::Clause::OMPC_lastprivate;
+             }) != set->end();
+    }
+    return false;
+  };
+
+  auto hasTarget = findDirective(llvm::omp::OMPD_target);
+  if (hasTarget != nullptr) {
+    tomp::ObjectListT<IdTy, ExprTy> objects;
+    llvm::copy_if(
+        clause.v, std::back_inserter(objects), [&](const ObjectTy &object) {
+          return !inLastprivate(object) && !mapBases.contains(object.id());
+        });
+    if (!objects.empty()) {
+      auto *firstp = makeClause(
+          llvm::omp::Clause::OMPC_firstprivate,
+          tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy>{/*List=*/objects});
+      hasTarget->clauses.push_back(firstp);
+      applied = true;
+    }
+  }
+
+  return applied;
+}
+
+// LASTPRIVATE
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::LastprivateT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  bool applied = false;
+
+  // S The effect of the lastprivate clause is as if it is applied to all leaf
+  // S constructs that permit the clause.
+  applied = applyToAll(node);
+  if (!applied)
+    return false;
+
+  auto inFirstprivate = [&](const ObjectTy &object) {
+    if (ClauseSet *set = findClausesWith(object)) {
+      return llvm::find_if(*set, [](const ClauseTy *c) {
+               return c->id == llvm::omp::Clause::OMPC_firstprivate;
+             }) != set->end();
+    }
+    return false;
+  };
+
+  auto &objects = std::get<tomp::ObjectListT<IdTy, ExprTy>>(clause.t);
+
+  // Prepare list of objects that could end up in a SHARED clause.
+  tomp::ObjectListT<IdTy, ExprTy> sharedObjects;
+  llvm::copy_if(
+      objects, std::back_inserter(sharedObjects),
+      [&](const ObjectTy &object) { return !inFirstprivate(object); });
+
+  if (!sharedObjects.empty()) {
+    // S If the parallel construct is among the constituent constructs and the
+    // S list item is not also specified in the firstprivate clause, then the
+    // S effect of the lastprivate clause is as if the shared clause with the
+    // S same list item is applied to the parallel construct.
+    if (auto hasParallel = findDirective(llvm::omp::OMPD_parallel)) {
+      auto *shared = makeClause(
+          llvm::omp::Clause::OMPC_shared,
+          tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/sharedObjects});
+      hasParallel->clauses.push_back(shared);
+      applied = true;
+    }
+
+    // S If the teams construct is among the constituent constructs and the
+    // S list item is not also specified in the firstprivate clause, then the
+    // S effect of the lastprivate clause is as if the shared clause with the
+    // S same list item is applied to the teams construct.
+    if (auto hasTeams = findDirective(llvm::omp::OMPD_teams)) {
+      auto *shared = makeClause(
+          llvm::omp::Clause::OMPC_shared,
+          tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/sharedObjects});
+      hasTeams->clauses.push_back(shared);
+      applied = true;
+    }
+  }
+
+  // S If the target construct is among the constituent constructs and the
+  // S list item is not the base variable or base pointer of a list item that
+  // S appears in a map clause, the effect of the lastprivate clause is as if
+  // S the same list item appears in a map clause with a map-type of tofrom.
+  if (auto hasTarget = findDirective(llvm::omp::OMPD_target)) {
+    tomp::ObjectListT<IdTy, ExprTy> tofrom;
+    llvm::copy_if(objects, std::back_inserter(tofrom),
+                  [&](const ObjectTy &object) {
+                    return !mapBases.contains(object.id());
+                  });
+
+    if (!tofrom.empty()) {
+      using MapType =
+          typename tomp::clause::MapT<TypeTy, IdTy, ExprTy>::MapType;
+      auto *map =
+          makeClause(llvm::omp::Clause::OMPC_map,
+                     tomp::clause::MapT<TypeTy, IdTy, ExprTy>{
+                         {/*MapType=*/MapType::Tofrom,
+                          /*MapTypeModifier=*/std::nullopt,
+                          /*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt,
+                          /*LocatorList=*/std::move(tofrom)}});
+      hasTarget->clauses.push_back(map);
+      applied = true;
+    }
+  }
+
+  return applied;
+}
+
+// SHARED
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::SharedT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // Apply SHARED to the all leafs that allow it.
+  return applyToAll(node);
+}
+
+// DEFAULT
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::DefaultT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // Apply DEFAULT to the all leafs that allow it.
+  return applyToAll(node);
+}
+
+// THREAD_LIMIT
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::ThreadLimitT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // Apply THREAD_LIMIT to the all leafs that allow it.
+  return applyToAll(node);
+}
+
+// ORDER
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::OrderT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // Apply ORDER to the all leafs that allow it.
+  return applyToAll(node);
+}
+
+// ALLOCATE
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::AllocateT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // This one needs to be applied at the end, once we know which clauses are
+  // assigned to which leaf constructs.
+
+  // S The effect of the allocate clause is as if it is applied to all leaf
+  // S constructs that permit the clause and to which a data-sharing attribute
+  // S clause that may create a private copy of the same list item is applied.
+
+  auto canMakePrivateCopy = [](llvm::omp::Clause id) {
+    switch (id) {
+    case llvm::omp::Clause::OMPC_firstprivate:
+    case llvm::omp::Clause::OMPC_lastprivate:
+    case llvm::omp::Clause::OMPC_private:
+      return true;
+    default:
+      return false;
+    }
+  };
+
+  bool applied = applyIf(node, [&](const auto &dwc) {
+    return llvm::any_of(dwc.clauses, [&](const ClauseTy *n) {
+      return canMakePrivateCopy(n->id);
+    });
+  });
+
+  return applied;
+}
+
+// REDUCTION
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::ReductionT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // S The effect of the reduction clause is as if it is applied to all leaf
+  // S constructs that permit the clause, except for the following constructs:
+  // S - The parallel construct, when combined with the sections, worksharing-
+  // S   loop, loop, or taskloop construct; and
+  // S - The teams construct, when combined with the loop construct.
+  bool applyToParallel = true, applyToTeams = true;
+
+  auto hasParallel = findDirective(llvm::omp::Directive::OMPD_parallel);
+  if (hasParallel) {
+    auto exclusions = llvm::concat<const llvm::omp::Directive>(
+        getWorksharingLoop(), llvm::ArrayRef{
+                                  llvm::omp::Directive::OMPD_loop,
+                                  llvm::omp::Directive::OMPD_sections,
+                                  llvm::omp::Directive::OMPD_taskloop,
+                              });
+    auto present = [&](llvm::omp::Directive id) {
+      return findDirective(id) != nullptr;
+    };
+
+    if (llvm::any_of(exclusions, present))
+      applyToParallel = false;
+  }
+
+  auto hasTeams = findDirective(llvm::omp::Directive::OMPD_teams);
+  if (hasTeams) {
+    // The only exclusion is OMPD_loop.
+    if (findDirective(llvm::omp::Directive::OMPD_loop))
+      applyToTeams = false;
+  }
+
+  auto &objects = std::get<tomp::ObjectListT<IdTy, ExprTy>>(clause.t);
+
+  tomp::ObjectListT<IdTy, ExprTy> sharedObjects;
+  llvm::transform(objects, std::back_inserter(sharedObjects),
+                  [&](const ObjectTy &object) {
+                    auto maybeBase = helper.getBaseObject(object);
+                    return maybeBase ? *maybeBase : object;
+                  });
+
+  // S For the parallel and teams constructs above, the effect of the
+  // S reduction clause instead is as if each list item or, for any list
+  // S item that is an array item, its corresponding base array or base
+  // S pointer appears in a shared clause for the construct.
+  if (!sharedObjects.empty()) {
+    if (hasParallel && !applyToParallel) {
+      auto *shared = makeClause(
+          llvm::omp::Clause::OMPC_shared,
+          tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/sharedObjects});
+      hasParallel->clauses.push_back(shared);
+    }
+    if (hasTeams && !applyToTeams) {
+      auto *shared = makeClause(
+          llvm::omp::Clause::OMPC_shared,
+          tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/sharedObjects});
+      hasTeams->clauses.push_back(shared);
+    }
+  }
+
+  // TODO(not implemented in parser yet): Apply the following.
+  // S If the task reduction-modifier is specified, the effect is as if
+  // S it only modifies the behavior of the reduction clause on the innermost
+  // S leaf construct that accepts the modifier (see Section 5.5.8). If the
+  // S inscan reduction-modifier is specified, the effect is as if it modifies
+  // S the behavior of the reduction clause on all constructs of the combined
+  // S construct to which the clause is applied and that accept the modifier.
+
+  bool applied = applyIf(node, [&](auto &dwc) {
+    if (!applyToParallel && &dwc == hasParallel)
+      return false;
+    if (!applyToTeams && &dwc == hasTeams)
+      return false;
+    return true;
+  });
+
+  // S If a list item in a reduction clause on a combined target construct
+  // S does not have the same base variable or base pointer as a list item
+  // S in a map clause on the construct, then the effect is as if the list
+  // S item in the reduction clause appears as a list item in a map clause
+  // S with a map-type of tofrom.
+  auto hasTarget = findDirective(llvm::omp::Directive::OMPD_target);
+  if (hasTarget && leafs.size() > 1) {
+    tomp::ObjectListT<IdTy, ExprTy> tofrom;
+    llvm::copy_if(objects, std::back_inserter(tofrom),
+                  [&](const ObjectTy &object) {
+                    if (auto maybeBase = helper.getBaseObject(object))
+                      return !mapBases.contains(maybeBase->id());
+                    return !mapBases.contains(object.id()); // XXX is this ok?
+                  });
+    if (!tofrom.empty()) {
+      using MapType =
+          typename tomp::clause::MapT<TypeTy, IdTy, ExprTy>::MapType;
+      auto *map = makeClause(
+          llvm::omp::Clause::OMPC_map,
+          tomp::clause::MapT<TypeTy, IdTy, ExprTy>{
+              {/*MapType=*/MapType::Tofrom, /*MapTypeModifier=*/std::nullopt,
+               /*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt,
+               /*LocatorList=*/std::move(tofrom)}});
+
+      hasTarget->clauses.push_back(map);
+      applied = true;
+    }
+  }
+
+  return applied;
+}
+
+// IF
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::IfT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  using DirectiveNameModifier =
+      typename clause::IfT<TypeTy, IdTy, ExprTy>::DirectiveNameModifier;
+  auto &modifier = std::get<std::optional<DirectiveNameModifier>>(clause.t);
+
+  if (modifier) {
+    llvm::omp::Directive dirId = *modifier;
+
+    if (auto *hasDir = findDirective(dirId)) {
+      hasDir->clauses.push_back(node);
+      return true;
+    }
+    return false;
+  }
+
+  return applyToAll(node);
+}
+
+// LINEAR
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::LinearT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // S The effect of the linear clause is as if it is applied to the innermost
+  // S leaf construct.
+  if (!applyToInnermost(node))
+    return false;
+
+  // The rest is about SIMD.
+  if (!findDirective(llvm::omp::OMPD_simd))
+    return true;
+
+  // S Additionally, if the list item is not the iteration variable of a
+  // S simd or worksharing-loop SIMD construct, the effect on the outer leaf
+  // S constructs is as if the list item was specified in firstprivate and
+  // S lastprivate clauses on the combined or composite construct, [...]
+  //
+  // S If a list item of the linear clause is the iteration variable of a
+  // S simd or worksharing-loop SIMD construct and it is not declared in
+  // S the construct, the effect on the outer leaf constructs is as if the
+  // S list item was specified in a lastprivate clause on the combined or
+  // S composite construct [...]
+
+  // It's not clear how an object can be listed in a clause AND be the
+  // iteration variable of a construct in which is it declared. If an
+  // object is declared in the construct, then the declaration is located
+  // after the clause listing it.
+
+  std::optional<ObjectTy> iterVar = helper.getLoopIterVar();
+  const auto &objects = std::get<tomp::ObjectListT<IdTy, ExprTy>>(clause.t);
+
+  // Lists of objects that will be used to construct FIRSTPRIVATE and
+  // LASTPRIVATE clauses.
+  tomp::ObjectListT<IdTy, ExprTy> first, last;
+
+  for (const ObjectTy &object : objects) {
+    last.push_back(object);
+    if (iterVar && object.id() != iterVar->id())
+      first.push_back(object);
+  }
+
+  if (!first.empty()) {
+    auto *firstp = makeClause(
+        llvm::omp::Clause::OMPC_firstprivate,
+        tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy>{/*List=*/first});
+    nodes.push_back(firstp); // Appending to the main clause list.
+  }
+  if (!last.empty()) {
+    auto *lastp =
+        makeClause(llvm::omp::Clause::OMPC_lastprivate,
+                   tomp::clause::LastprivateT<TypeTy, IdTy, ExprTy>{
+                       {/*LastprivateModifier=*/std::nullopt, /*List=*/last}});
+    nodes.push_back(lastp); // Appending to the main clause list.
+  }
+  return true;
+}
+
+// NOWAIT
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::NowaitT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  return applyToOutermost(node);
+}
+
+template <typename C, typename H> bool ConstructDecompositionT<C, H>::split() {
+  bool success = true;
+
+  for (llvm::omp::Directive leaf :
+       llvm::omp::getLeafConstructsOrSelf(construct))
+    leafs.push_back(LeafReprInternal{leaf, /*clauses=*/{}});
+
+  for (const ClauseTy *node : nodes)
+    addClauseSymsToMap(*node, node);
+
+  // First we need to apply LINEAR, because it can generate additional
+  // FIRSTPRIVATE and LASTPRIVATE clauses that apply to the combined/
+  // composite construct.
+  // Collect them separately, because they may modify the clause list.
+  llvm::SmallVector<const ClauseTy *> linears;
+  for (const ClauseTy *node : nodes) {
+    if (node->id == llvm::omp::Clause::OMPC_linear)
+      linears.push_back(node);
+  }
+  for (const auto *node : linears) {
+    success = success &&
+              applyClause(std::get<tomp::clause::LinearT<TypeTy, IdTy, ExprTy>>(
+                              node->u),
+                          node);
+  }
+
+  // ALLOCATE clauses need to be applied last since they need to see
+  // which directives have data-privatizing clauses.
+  auto skip = [](const ClauseTy *node) {
+    switch (node->id) {
+    case llvm::omp::Clause::OMPC_allocate:
+    case llvm::omp::Clause::OMPC_linear:
+      return true;
+    default:
+      return false;
+    }
+  };
+
+  // Apply (almost) all clauses.
+  for (const ClauseTy *node : nodes) {
+    if (skip(node))
+      continue;
+    success =
+        success &&
+        std::visit([&](auto &&s) { return applyClause(s, node); }, node->u);
+  }
+
+  // Apply ALLOCATE.
+  for (const ClauseTy *node : nodes) {
+    if (node->id != llvm::omp::Clause::OMPC_allocate)
+      continue;
+    success =
+        success &&
+        std::visit([&](auto &&s) { return applyClause(s, node); }, node->u);
+  }
+
+  return success;
+}
+
+} // namespace tomp
+
+#endif // LLVM_FRONTEND_OPENMP_COMPOUNDSPLITTERT_H

>From 60201bbf05fd1cbd63c0073cf399dcb7cab1ff99 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Thu, 25 Apr 2024 13:09:46 -0500
Subject: [PATCH 05/15] Add LLVM license header, fix include guard

---
 .../Frontend/OpenMP/ConstructDecompositionT.h | 23 ++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
index f8a18a1bed7ae6..f2a3c0d327fd46 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
@@ -1,5 +1,22 @@
-#ifndef LLVM_FRONTEND_OPENMP_COMPOUNDSPLITTERT_H
-#define LLVM_FRONTEND_OPENMP_COMPOUNDSPLITTERT_H
+//===- ConstructDecompositionT.h -- Decomposing compound constructs -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Given a compound construct with a set of clauses, generate the list of
+// constituent leaf constructs, each with a list of clauses that apply to it.
+//
+// Note: Clauses that are not originally present, but that are implied by the
+// OpenMP spec are materialized, and are present in the output.
+//
+// Note: Composite constructs will also be broken up into leaf constructs.
+// If composite constructs require processing as a whole, the lists of clauses
+// for each leaf constituent should be concatenated.
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_FRONTEND_OPENMP_CONSTRUCTDECOMPOSITIONT_H
+#define LLVM_FRONTEND_OPENMP_CONSTRUCTDECOMPOSITIONT_H
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -982,4 +999,4 @@ template <typename C, typename H> bool ConstructDecompositionT<C, H>::split() {
 
 } // namespace tomp
 
-#endif // LLVM_FRONTEND_OPENMP_COMPOUNDSPLITTERT_H
+#endif // LLVM_FRONTEND_OPENMP_CONSTRUCTDECOMPOSITIONT_H

>From d5635d667fd2e32eb600a810252b031ad7064fce Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Thu, 25 Apr 2024 14:08:12 -0500
Subject: [PATCH 06/15] Split the directive decomposer into its own file

---
 flang/lib/Lower/CMakeLists.txt        |   1 +
 flang/lib/Lower/OpenMP/Decomposer.cpp | 107 ++++++++++++++++++++++
 flang/lib/Lower/OpenMP/Decomposer.h   |  44 +++++++++
 flang/lib/Lower/OpenMP/OpenMP.cpp     | 126 ++++++--------------------
 4 files changed, 179 insertions(+), 99 deletions(-)
 create mode 100644 flang/lib/Lower/OpenMP/Decomposer.cpp
 create mode 100644 flang/lib/Lower/OpenMP/Decomposer.h

diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt
index f92d1a2bc7de18..1546409752e781 100644
--- a/flang/lib/Lower/CMakeLists.txt
+++ b/flang/lib/Lower/CMakeLists.txt
@@ -27,6 +27,7 @@ add_flang_library(FortranLower
   OpenMP/ClauseProcessor.cpp
   OpenMP/Clauses.cpp
   OpenMP/DataSharingProcessor.cpp
+  OpenMP/Decomposer.cpp
   OpenMP/OpenMP.cpp
   OpenMP/ReductionProcessor.cpp
   OpenMP/Utils.cpp
diff --git a/flang/lib/Lower/OpenMP/Decomposer.cpp b/flang/lib/Lower/OpenMP/Decomposer.cpp
new file mode 100644
index 00000000000000..5bc19797e08a1b
--- /dev/null
+++ b/flang/lib/Lower/OpenMP/Decomposer.cpp
@@ -0,0 +1,107 @@
+//===-- Decomposer.cpp -- Compound directive decomposition ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#include "Decomposer.h"
+
+#include "Clauses.h"
+#include "Utils.h"
+#include "flang/Lower/PFTBuilder.h"
+#include "flang/Semantics/semantics.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Frontend/OpenMP/ClauseT.h"
+#include "llvm/Frontend/OpenMP/ConstructDecompositionT.h"
+#include "llvm/Frontend/OpenMP/OMP.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <optional>
+#include <utility>
+#include <variant>
+
+using namespace Fortran;
+
+namespace {
+using namespace Fortran::lower::omp;
+
+struct ConstructDecomposition {
+  ConstructDecomposition(mlir::ModuleOp modOp,
+                         semantics::SemanticsContext &semaCtx,
+                         lower::pft::Evaluation &ev,
+                         llvm::omp::Directive compound,
+                         const List<Clause> &clauses)
+      : semaCtx(semaCtx), mod(modOp), eval(ev) {
+    tomp::ConstructDecompositionT decompose(getOpenMPVersion(modOp), *this,
+                                            compound, llvm::ArrayRef(clauses));
+    output = std::move(decompose.output);
+  }
+
+  // Given an object, return its base object if one exists.
+  std::optional<Object> getBaseObject(const Object &object) {
+    return lower::omp::getBaseObject(object, semaCtx);
+  }
+
+  // Return the iteration variable of the associated loop if any.
+  std::optional<Object> getLoopIterVar() {
+    if (semantics::Symbol *symbol = getIterationVariableSymbol(eval))
+      return Object{symbol, /*designator=*/{}};
+    return std::nullopt;
+  }
+
+  semantics::SemanticsContext &semaCtx;
+  mlir::ModuleOp mod;
+  lower::pft::Evaluation &eval;
+  List<UnitConstruct> output;
+};
+} // namespace
+
+namespace Fortran::lower::omp {
+LLVM_DUMP_METHOD llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                               const UnitConstruct &uc) {
+  os << llvm::omp::getOpenMPDirectiveName(uc.id);
+  for (auto [index, clause] : llvm::enumerate(uc.clauses)) {
+    os << (index == 0 ? '\t' : ' ');
+    os << llvm::omp::getOpenMPClauseName(clause.id);
+  }
+  return os;
+}
+
+ConstructQueue buildConstructQueue(
+    mlir::ModuleOp modOp, Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::pft::Evaluation &eval, llvm::omp::Directive compound,
+    const List<Clause> &clauses) {
+
+  List<UnitConstruct> constructs;
+
+  ConstructDecomposition decompose(modOp, semaCtx, eval, compound, clauses);
+  assert(!decompose.output.empty());
+
+  llvm::SmallVector<llvm::omp::Directive> loweringUnits;
+  std::ignore =
+      llvm::omp::getLeafOrCompositeConstructs(compound, loweringUnits);
+
+  int leafIndex = 0;
+  for (llvm::omp::Directive dir_id : loweringUnits) {
+    constructs.push_back(UnitConstruct{dir_id});
+    UnitConstruct &uc = constructs.back();
+    llvm::ArrayRef<llvm::omp::Directive> leafsOrSelf =
+        llvm::omp::getLeafConstructsOrSelf(dir_id);
+    for (int i = 0, e = leafsOrSelf.size(); i != e; ++i) {
+      uc.clauses.append(decompose.output[leafIndex].clauses);
+      ++leafIndex;
+    }
+  }
+
+  return constructs;
+}
+} // namespace Fortran::lower::omp
diff --git a/flang/lib/Lower/OpenMP/Decomposer.h b/flang/lib/Lower/OpenMP/Decomposer.h
new file mode 100644
index 00000000000000..e2a9e29918fbb8
--- /dev/null
+++ b/flang/lib/Lower/OpenMP/Decomposer.h
@@ -0,0 +1,44 @@
+//===-- Decomposer.h -- Compound directive decomposition ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef FORTRAN_LOWER_OPENMP_DECOMPOSER_H
+#define FORTRAN_LOWER_OPENMP_DECOMPOSER_H
+
+#include "Clauses.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "llvm/Frontend/OpenMP/ConstructDecompositionT.h"
+#include "llvm/Frontend/OpenMP/OMP.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class raw_ostream;
+}
+
+namespace Fortran {
+namespace semantics {
+class SemanticsContext;
+}
+namespace lower::pft {
+struct Evaluation;
+}
+} // namespace Fortran
+
+namespace Fortran::lower::omp {
+using UnitConstruct = tomp::DirectiveWithClauses<lower::omp::Clause>;
+using ConstructQueue = List<UnitConstruct>;
+
+LLVM_DUMP_METHOD llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                               const UnitConstruct &uc);
+
+ConstructQueue
+buildConstructQueue(mlir::ModuleOp modOp,
+                    Fortran::semantics::SemanticsContext &semaCtx,
+                    Fortran::lower::pft::Evaluation &eval,
+                    llvm::omp::Directive compound, const List<Clause> &clauses);
+} // namespace Fortran::lower::omp
+
+#endif // FORTRAN_LOWER_OPENMP_DECOMPOSER_H
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 4b8afd42f639d5..7e0105e749c39e 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -15,6 +15,7 @@
 #include "ClauseProcessor.h"
 #include "Clauses.h"
 #include "DataSharingProcessor.h"
+#include "Decomposer.h"
 #include "DirectivesCommon.h"
 #include "ReductionProcessor.h"
 #include "Utils.h"
@@ -36,7 +37,6 @@
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Frontend/OpenMP/ConstructDecompositionT.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 
 using namespace Fortran::lower::omp;
@@ -45,6 +45,13 @@ using namespace Fortran::lower::omp;
 // Code generation helper functions
 //===----------------------------------------------------------------------===//
 
+static void genOMPDispatch(Fortran::lower::AbstractConverter &converter,
+                           Fortran::lower::SymMap &symTable,
+                           Fortran::semantics::SemanticsContext &semaCtx,
+                           Fortran::lower::pft::Evaluation &eval,
+                           mlir::Location loc, const ConstructQueue &queue,
+                           ConstructQueue::iterator item);
+
 static Fortran::lower::pft::Evaluation *
 getCollapsedLoopEval(Fortran::lower::pft::Evaluation &eval, int collapseValue) {
   // Return the Evaluation of the innermost collapsed loop, or the current one
@@ -73,89 +80,6 @@ static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
     converter.genEval(e);
 }
 
-//===----------------------------------------------------------------------===//
-// Directive decomposition
-//===----------------------------------------------------------------------===//
-
-namespace {
-using DirectiveWithClauses = tomp::DirectiveWithClauses<lower::omp::Clause>;
-using ConstructQueue = List<DirectiveWithClauses>;
-} // namespace
-
-static void genOMPDispatch(Fortran::lower::AbstractConverter &converter,
-                           Fortran::lower::SymMap &symTable,
-                           Fortran::semantics::SemanticsContext &semaCtx,
-                           Fortran::lower::pft::Evaluation &eval,
-                           mlir::Location loc, const ConstructQueue &queue,
-                           ConstructQueue::iterator item);
-
-namespace {
-struct ConstructDecomposition {
-  ConstructDecomposition(mlir::ModuleOp modOp,
-                         semantics::SemanticsContext &semaCtx,
-                         lower::pft::Evaluation &ev,
-                         llvm::omp::Directive construct,
-                         const List<Clause> &clauses)
-      : semaCtx(semaCtx), mod(modOp), eval(ev) {
-    tomp::ConstructDecompositionT decompose(getOpenMPVersion(modOp), *this,
-                                            construct, llvm::ArrayRef(clauses));
-    output = std::move(decompose.output);
-  }
-
-  // Given an object, return its base object if one exists.
-  std::optional<Object> getBaseObject(const Object &object) {
-    return lower::omp::getBaseObject(object, semaCtx);
-  }
-
-  // Return the iteration variable of the associated loop if any.
-  std::optional<Object> getLoopIterVar() {
-    if (semantics::Symbol *symbol = getIterationVariableSymbol(eval))
-      return Object{symbol, /*designator=*/{}};
-    return std::nullopt;
-  }
-
-  semantics::SemanticsContext &semaCtx;
-  mlir::ModuleOp mod;
-  lower::pft::Evaluation &eval;
-  List<DirectiveWithClauses> output;
-};
-} // namespace
-
-LLVM_DUMP_METHOD static llvm::raw_ostream &
-operator<<(llvm::raw_ostream &os, const DirectiveWithClauses &dwc) {
-  os << llvm::omp::getOpenMPDirectiveName(dwc.id);
-  for (auto [index, clause] : llvm::enumerate(dwc.clauses)) {
-    os << (index == 0 ? '\t' : ' ');
-    os << llvm::omp::getOpenMPClauseName(clause.id);
-  }
-  return os;
-}
-
-static void splitCompoundConstruct(
-    mlir::ModuleOp modOp, Fortran::semantics::SemanticsContext &semaCtx,
-    Fortran::lower::pft::Evaluation &eval, llvm::omp::Directive construct,
-    const List<Clause> &clauses, List<DirectiveWithClauses> &directives) {
-
-  ConstructDecomposition decompose(modOp, semaCtx, eval, construct, clauses);
-  assert(!decompose.output.empty());
-
-  llvm::SmallVector<llvm::omp::Directive> loweringUnits;
-  std::ignore =
-      llvm::omp::getLeafOrCompositeConstructs(construct, loweringUnits);
-
-  int leafIndex = 0;
-  for (llvm::omp::Directive dir_id : loweringUnits) {
-    directives.push_back(DirectiveWithClauses{dir_id});
-    DirectiveWithClauses &dwc = directives.back();
-    llvm::ArrayRef<llvm::omp::Directive> leafsOrSelf =
-        llvm::omp::getLeafConstructsOrSelf(dir_id);
-    for (int i = 0, e = leafsOrSelf.size(); i != e; ++i) {
-      dwc.clauses.append(decompose.output[leafIndex].clauses);
-      ++leafIndex;
-    }
-  }
-}
-
 static fir::GlobalOp globalInitialization(
     Fortran::lower::AbstractConverter &converter,
     fir::FirOpBuilder &firOpBuilder, const Fortran::semantics::Symbol &sym,
@@ -2170,7 +2094,9 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
       semaCtx);
   mlir::Location currentLocation = converter.genLocation(directive.source);
 
-  ConstructQueue queue{{DirectiveWithClauses{directive.v, clauses}}};
+  ConstructQueue queue{
+      buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
+                          eval, directive.v, clauses)};
 
   switch (directive.v) {
   default:
@@ -2234,7 +2160,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
   mlir::Location currentLocation = converter.genLocation(verbatim.source);
 
   ConstructQueue queue{
-      DirectiveWithClauses{llvm::omp::Directive::OMPD_flush, clauses}};
+      buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
+                          eval, llvm::omp::Directive::OMPD_flush, clauses)};
   genFlushOp(converter, symTable, semaCtx, eval, currentLocation, objects,
              clauses, queue, queue.begin());
 }
@@ -2381,9 +2308,9 @@ genOMP(Fortran::lower::AbstractConverter &converter,
 
   llvm::omp::Directive directive =
       std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v;
-  ConstructQueue queue;
-  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
-                         directive, clauses, queue);
+  ConstructQueue queue{
+      buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
+                          eval, directive, clauses)};
   genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
                  queue.begin());
 }
@@ -2399,9 +2326,9 @@ genOMP(Fortran::lower::AbstractConverter &converter,
   List<Clause> clauses =
       makeClauses(std::get<Fortran::parser::OmpClauseList>(cd.t), semaCtx);
 
-  ConstructQueue queue;
-  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
-                         llvm::omp::Directive::OMPD_critical, clauses, queue);
+  ConstructQueue queue{
+      buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
+                          eval, llvm::omp::Directive::OMPD_critical, clauses)};
 
   const auto &name = std::get<std::optional<Fortran::parser::Name>>(cd.t);
   mlir::Location currentLocation = converter.getCurrentLocation();
@@ -2440,9 +2367,9 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
 
   llvm::omp::Directive directive =
       std::get<parser::OmpLoopDirective>(beginLoopDirective.t).v;
-  ConstructQueue queue;
-  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
-                         directive, clauses, queue);
+  ConstructQueue queue{
+      buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
+                          eval, directive, clauses)};
   genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
                  queue.begin());
 }
@@ -2455,7 +2382,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
        const Fortran::parser::OpenMPSectionConstruct &sectionConstruct) {
   mlir::Location loc = converter.getCurrentLocation();
   ConstructQueue queue{
-      DirectiveWithClauses{llvm::omp::Directive::OMPD_section}};
+      buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
+                          eval, llvm::omp::Directive::OMPD_section, {})};
   genSectionOp(converter, symTable, semaCtx, eval, loc,
                /*clauses=*/{}, queue, queue.begin());
 }
@@ -2480,9 +2408,9 @@ genOMP(Fortran::lower::AbstractConverter &converter,
 
   llvm::omp::Directive directive =
       std::get<parser::OmpSectionsDirective>(beginSectionsDirective.t).v;
-  ConstructQueue queue;
-  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
-                         directive, clauses, queue);
+  ConstructQueue queue{
+      buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
+                          eval, directive, clauses)};
   genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
                  queue.begin());
 }

>From 38ea99f79d7245978b08dae082f0c1999219fe78 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 30 Apr 2024 07:47:57 -0500
Subject: [PATCH 07/15] Expand tuple member into class with TupleTrait

---
 llvm/include/llvm/Frontend/OpenMP/ClauseT.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
index daef02bcfc9a3f..9357e3fb8e5524 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
@@ -1142,9 +1142,11 @@ struct UsesAllocatorsT {
   using MemSpace = E;
   using TraitsArray = ObjectT<I, E>;
   using Allocator = E;
-  using AllocatorSpec =
-      std::tuple<OPT(MemSpace), OPT(TraitsArray), Allocator>; // Not a spec name
-  using Allocators = ListT<AllocatorSpec>;                    // Not a spec name
+  struct AllocatorSpec { // Not a spec name
+    using TupleTrait = std::true_type;
+    std::tuple<OPT(MemSpace), OPT(TraitsArray), Allocator> t;
+  };
+  using Allocators = ListT<AllocatorSpec>; // Not a spec name
   using WrapperTrait = std::true_type;
   Allocators v;
 };

>From 579967ee26d08d4afaf00dd0e8ff38e5cb09a9b1 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 30 Apr 2024 13:02:02 -0500
Subject: [PATCH 08/15] Switch to getOpenMPVersionAttribute, remove old
 functions

---
 flang/lib/Lower/OpenMP/Decomposer.cpp | 6 ++++--
 flang/lib/Lower/OpenMP/Utils.cpp      | 6 ------
 flang/lib/Lower/OpenMP/Utils.h        | 1 -
 3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/Decomposer.cpp b/flang/lib/Lower/OpenMP/Decomposer.cpp
index 5bc19797e08a1b..2d14753f83d4e9 100644
--- a/flang/lib/Lower/OpenMP/Decomposer.cpp
+++ b/flang/lib/Lower/OpenMP/Decomposer.cpp
@@ -16,6 +16,7 @@
 #include "Utils.h"
 #include "flang/Lower/PFTBuilder.h"
 #include "flang/Semantics/semantics.h"
+#include "flang/Tools/CrossToolHelpers.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
@@ -41,8 +42,9 @@ struct ConstructDecomposition {
                          llvm::omp::Directive compound,
                          const List<Clause> &clauses)
       : semaCtx(semaCtx), mod(modOp), eval(ev) {
-    tomp::ConstructDecompositionT decompose(getOpenMPVersion(modOp), *this,
-                                            compound, llvm::ArrayRef(clauses));
+    tomp::ConstructDecompositionT decompose(getOpenMPVersionAttribute(modOp),
+                                            *this, compound,
+                                            llvm::ArrayRef(clauses));
     output = std::move(decompose.output);
   }
 
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index c38e0c18cac88c..0e7645859a4f52 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -48,12 +48,6 @@ int64_t getCollapseValue(const List<Clause> &clauses) {
   return 1;
 }
 
-uint32_t getOpenMPVersion(mlir::ModuleOp mod) {
-  if (mlir::Attribute verAttr = mod->getAttr("omp.version"))
-    return llvm::cast<mlir::omp::VersionAttr>(verAttr).getVersion();
-  llvm_unreachable("Expecting OpenMP version attribute in module");
-}
-
 void genObjectList(const ObjectList &objects,
                    Fortran::lower::AbstractConverter &converter,
                    llvm::SmallVectorImpl<mlir::Value> &operands) {
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 5e0ebba23bf358..82c1fb3c8209e5 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -65,7 +65,6 @@ void gatherFuncAndVarSyms(
     llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause);
 
 int64_t getCollapseValue(const List<Clause> &clauses);
-uint32_t getOpenMPVersion(mlir::ModuleOp mod);
 
 Fortran::semantics::Symbol *
 getOmpObjectSymbol(const Fortran::parser::OmpObject &ompObject);

>From 6e46269fc0ae0530c3e85a626e3e2c0804d44bec Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 30 Apr 2024 07:49:40 -0500
Subject: [PATCH 09/15] Add tests, update tests, add clause merging

---
 flang/lib/Lower/OpenMP/ClauseProcessor.cpp    |   1 +
 flang/lib/Lower/OpenMP/Clauses.cpp            |  24 +
 flang/lib/Lower/OpenMP/Clauses.h              |  16 +-
 flang/lib/Lower/OpenMP/Decomposer.cpp         |  31 +-
 flang/lib/Lower/OpenMP/Decomposer.h           |  12 +-
 flang/lib/Lower/OpenMP/OpenMP.cpp             |  38 +-
 flang/test/Integration/OpenMP/copyprivate.f90 |   8 +-
 .../Lower/OpenMP/allocatable-array-bounds.f90 |  36 +-
 flang/test/Lower/OpenMP/array-bounds.f90      |  10 +-
 flang/test/Lower/OpenMP/copyprivate.f90       |  48 +-
 .../Lower/OpenMP/default-clause-byref.f90     |   9 +-
 flang/test/Lower/OpenMP/default-clause.f90    |   8 +-
 .../parallel-firstprivate-clause-scalar.f90   | 120 +--
 .../parallel-lastprivate-clause-scalar.f90    |   4 +-
 .../OpenMP/parallel-wsloop-firstpriv.f90      |   8 +-
 .../Lower/OpenMP/wsloop-reduction-multi.f90   |  30 +-
 llvm/include/llvm/Frontend/OpenMP/ClauseT.h   |  44 +-
 .../Frontend/OpenMP/ConstructCompositionT.h   | 404 +++++++
 .../Frontend/OpenMP/ConstructDecompositionT.h | 459 +++++---
 llvm/unittests/Frontend/CMakeLists.txt        |   1 +
 .../Frontend/OpenMPDecompositionTest.cpp      | 999 ++++++++++++++++++
 21 files changed, 1987 insertions(+), 323 deletions(-)
 create mode 100644 llvm/include/llvm/Frontend/OpenMP/ConstructCompositionT.h
 create mode 100644 llvm/unittests/Frontend/OpenMPDecompositionTest.cpp

diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index bb83e8d5e78889..5af17f232707f0 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -520,6 +520,7 @@ bool ClauseProcessor::processCopyin() const {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
   firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
+
   auto checkAndCopyHostAssociateVar =
       [&](Fortran::semantics::Symbol *sym,
           mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr) {
diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
index 97337cfc08c72a..7257f9dee77dd5 100644
--- a/flang/lib/Lower/OpenMP/Clauses.cpp
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -1227,4 +1227,28 @@ List<Clause> makeClauses(const parser::OmpClauseList &clauses,
     return makeClause(s, semaCtx);
   });
 }
+
+bool transferLocations(const List<Clause> &from, List<Clause> &to) {
+  bool allDone = true;
+
+  for (Clause &clause : to) {
+    if (!clause.source.empty())
+      continue;
+    auto found = llvm::find_if(from, [&](const Clause &c) {
+      return c.id == clause.id;
+    });
+    // This is not completely accurate, but should be good enough for now.
+    // It can be improved in the future if necessary, but in cases of
+    // synthesized clauses getting accurate location may be impossible.
+    if (found != from.end()) {
+      clause.source = found->source;
+    } else {
+      // Found a clause that won't have "source".
+      allDone = false;
+    }
+  }
+
+  return allDone;
+}
+
 } // namespace Fortran::lower::omp
diff --git a/flang/lib/Lower/OpenMP/Clauses.h b/flang/lib/Lower/OpenMP/Clauses.h
index 3e776425c733e0..61c8fd2d1e5946 100644
--- a/flang/lib/Lower/OpenMP/Clauses.h
+++ b/flang/lib/Lower/OpenMP/Clauses.h
@@ -23,16 +23,19 @@
 
 namespace Fortran::lower::omp {
 using namespace Fortran;
-using SomeType = evaluate::SomeType;
 using SomeExpr = semantics::SomeExpr;
 using MaybeExpr = semantics::MaybeExpr;
 
-using TypeTy = SomeType;
+// evaluate::SomeType doesn't provide == operation. It's not really used in
+// flang's clauses so far, so a trivial implementation is sufficient.
+struct TypeTy : public evaluate::SomeType {
+  bool operator==(const TypeTy &t) const { return true; }
+};
+
 using IdTy = semantics::Symbol *;
 using ExprTy = SomeExpr;
 
-template <typename T>
-using List = tomp::ListT<T>;
+template <typename T> using List = tomp::ListT<T>;
 } // namespace Fortran::lower::omp
 
 namespace tomp::type {
@@ -222,6 +225,8 @@ using When = tomp::clause::WhenT<TypeTy, IdTy, ExprTy>;
 using Write = tomp::clause::WriteT<TypeTy, IdTy, ExprTy>;
 } // namespace clause
 
+using tomp::type::operator==;
+
 struct CancellationConstructType {
   using EmptyTrait = std::true_type;
 };
@@ -244,6 +249,7 @@ using ClauseBase = tomp::ClauseT<TypeTy, IdTy, ExprTy,
                                  MemoryOrder, Threadprivate>;
 
 struct Clause : public ClauseBase {
+  // "source" will be ignored by tomp::type::operator==.
   parser::CharBlock source;
 };
 
@@ -258,6 +264,8 @@ Clause makeClause(const Fortran::parser::OmpClause &cls,
 
 List<Clause> makeClauses(const parser::OmpClauseList &clauses,
                          semantics::SemanticsContext &semaCtx);
+
+bool transferLocations(const List<Clause> &from, List<Clause> &to);
 } // namespace Fortran::lower::omp
 
 #endif // FORTRAN_LOWER_OPENMP_CLAUSES_H
diff --git a/flang/lib/Lower/OpenMP/Decomposer.cpp b/flang/lib/Lower/OpenMP/Decomposer.cpp
index 2d14753f83d4e9..5305955de309b5 100644
--- a/flang/lib/Lower/OpenMP/Decomposer.cpp
+++ b/flang/lib/Lower/OpenMP/Decomposer.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Frontend/OpenMP/ClauseT.h"
+#include "llvm/Frontend/OpenMP/ConstructCompositionT.h"
 #include "llvm/Frontend/OpenMP/ConstructDecompositionT.h"
 #include "llvm/Frontend/OpenMP/OMP.h"
 #include "llvm/Support/raw_ostream.h"
@@ -67,6 +68,12 @@ struct ConstructDecomposition {
 };
 } // namespace
 
+static UnitConstruct mergeConstructs(uint32_t version,
+                                     llvm::ArrayRef<UnitConstruct> units) {
+  tomp::ConstructCompositionT compose(version, units);
+  return compose.merged;
+}
+
 namespace Fortran::lower::omp {
 LLVM_DUMP_METHOD llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
                                                const UnitConstruct &uc) {
@@ -80,8 +87,8 @@ LLVM_DUMP_METHOD llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
 
 ConstructQueue buildConstructQueue(
     mlir::ModuleOp modOp, Fortran::semantics::SemanticsContext &semaCtx,
-    Fortran::lower::pft::Evaluation &eval, llvm::omp::Directive compound,
-    const List<Clause> &clauses) {
+    Fortran::lower::pft::Evaluation &eval, const parser::CharBlock &source,
+    llvm::omp::Directive compound, const List<Clause> &clauses) {
 
   List<UnitConstruct> constructs;
 
@@ -91,17 +98,27 @@ ConstructQueue buildConstructQueue(
   llvm::SmallVector<llvm::omp::Directive> loweringUnits;
   std::ignore =
       llvm::omp::getLeafOrCompositeConstructs(compound, loweringUnits);
+  uint32_t version = getOpenMPVersionAttribute(modOp);
 
   int leafIndex = 0;
   for (llvm::omp::Directive dir_id : loweringUnits) {
-    constructs.push_back(UnitConstruct{dir_id});
-    UnitConstruct &uc = constructs.back();
     llvm::ArrayRef<llvm::omp::Directive> leafsOrSelf =
         llvm::omp::getLeafConstructsOrSelf(dir_id);
-    for (int i = 0, e = leafsOrSelf.size(); i != e; ++i) {
-      uc.clauses.append(decompose.output[leafIndex].clauses);
-      ++leafIndex;
+    size_t numLeafs = leafsOrSelf.size();
+
+    llvm::ArrayRef<UnitConstruct> toMerge{&decompose.output[leafIndex],
+                                          numLeafs};
+    auto &uc = constructs.emplace_back(mergeConstructs(version, toMerge));
+
+    if (!transferLocations(clauses, uc.clauses)) {
+      // If some clauses are left without source information, use the
+      // directive's source.
+      for (auto &clause : uc.clauses) {
+        if (clause.source.empty())
+          clause.source = source;
+      }
     }
+    leafIndex += numLeafs;
   }
 
   return constructs;
diff --git a/flang/lib/Lower/OpenMP/Decomposer.h b/flang/lib/Lower/OpenMP/Decomposer.h
index e2a9e29918fbb8..aefc860874cf40 100644
--- a/flang/lib/Lower/OpenMP/Decomposer.h
+++ b/flang/lib/Lower/OpenMP/Decomposer.h
@@ -10,6 +10,7 @@
 
 #include "Clauses.h"
 #include "mlir/IR/BuiltinOps.h"
+#include "llvm/Frontend/OpenMP/ConstructCompositionT.h"
 #include "llvm/Frontend/OpenMP/ConstructDecompositionT.h"
 #include "llvm/Frontend/OpenMP/OMP.h"
 #include "llvm/Support/Compiler.h"
@@ -34,11 +35,12 @@ using ConstructQueue = List<UnitConstruct>;
 LLVM_DUMP_METHOD llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
                                                const UnitConstruct &uc);
 
-ConstructQueue
-buildConstructQueue(mlir::ModuleOp modOp,
-                    Fortran::semantics::SemanticsContext &semaCtx,
-                    Fortran::lower::pft::Evaluation &eval,
-                    llvm::omp::Directive compound, const List<Clause> &clauses);
+ConstructQueue buildConstructQueue(mlir::ModuleOp modOp,
+                                   semantics::SemanticsContext &semaCtx,
+                                   lower::pft::Evaluation &eval,
+                                   const parser::CharBlock &source,
+                                   llvm::omp::Directive compound,
+                                   const List<Clause> &clauses);
 } // namespace Fortran::lower::omp
 
 #endif // FORTRAN_LOWER_OPENMP_DECOMPOSER_H
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 26a44cdf99070d..7b5d407740d7ef 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -546,8 +546,10 @@ struct OpWithBodyGenInfo {
 
 /// Create the body (block) for an OpenMP Operation.
 ///
-/// \param [in]   op - the operation the body belongs to.
-/// \param [in] info - options controlling code-gen for the construction.
+/// \param [in]   op  - the operation the body belongs to.
+/// \param [in] info  - options controlling code-gen for the construction.
+/// \param [in] queue - work queue with nested constructs.
+/// \param [in] item  - item in the queue to generate body for.
 static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info,
                            const ConstructQueue &queue,
                            ConstructQueue::iterator item) {
@@ -2166,7 +2168,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
 
   ConstructQueue queue{
       buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
-                          eval, directive.v, clauses)};
+                          eval, directive.source, directive.v, clauses)};
 
   switch (directive.v) {
   default:
@@ -2229,9 +2231,9 @@ genOMP(Fortran::lower::AbstractConverter &converter,
                  : List<Clause>{};
   mlir::Location currentLocation = converter.genLocation(verbatim.source);
 
-  ConstructQueue queue{
-      buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
-                          eval, llvm::omp::Directive::OMPD_flush, clauses)};
+  ConstructQueue queue{buildConstructQueue(
+      converter.getFirOpBuilder().getModule(), semaCtx, eval, verbatim.source,
+      llvm::omp::Directive::OMPD_flush, clauses)};
   genFlushOp(converter, symTable, semaCtx, eval, currentLocation, objects,
              clauses, queue, queue.begin());
 }
@@ -2378,9 +2380,11 @@ genOMP(Fortran::lower::AbstractConverter &converter,
 
   llvm::omp::Directive directive =
       std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v;
+  const parser::CharBlock &source =
+      std::get<parser::OmpBlockDirective>(beginBlockDirective.t).source;
   ConstructQueue queue{
       buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
-                          eval, directive, clauses)};
+                          eval, source, directive, clauses)};
   genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
                  queue.begin());
 }
@@ -2396,9 +2400,9 @@ genOMP(Fortran::lower::AbstractConverter &converter,
   List<Clause> clauses =
       makeClauses(std::get<Fortran::parser::OmpClauseList>(cd.t), semaCtx);
 
-  ConstructQueue queue{
-      buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
-                          eval, llvm::omp::Directive::OMPD_critical, clauses)};
+  ConstructQueue queue{buildConstructQueue(
+      converter.getFirOpBuilder().getModule(), semaCtx, eval, cd.source,
+      llvm::omp::Directive::OMPD_critical, clauses)};
 
   const auto &name = std::get<std::optional<Fortran::parser::Name>>(cd.t);
   mlir::Location currentLocation = converter.getCurrentLocation();
@@ -2437,9 +2441,11 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
 
   llvm::omp::Directive directive =
       std::get<parser::OmpLoopDirective>(beginLoopDirective.t).v;
+  const parser::CharBlock &source =
+      std::get<parser::OmpLoopDirective>(beginLoopDirective.t).source;
   ConstructQueue queue{
       buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
-                          eval, directive, clauses)};
+                          eval, source, directive, clauses)};
   genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
                  queue.begin());
 }
@@ -2451,9 +2457,9 @@ genOMP(Fortran::lower::AbstractConverter &converter,
        Fortran::lower::pft::Evaluation &eval,
        const Fortran::parser::OpenMPSectionConstruct &sectionConstruct) {
   mlir::Location loc = converter.getCurrentLocation();
-  ConstructQueue queue{
-      buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
-                          eval, llvm::omp::Directive::OMPD_section, {})};
+  ConstructQueue queue{buildConstructQueue(
+      converter.getFirOpBuilder().getModule(), semaCtx, eval,
+      sectionConstruct.source, llvm::omp::Directive::OMPD_section, {})};
   genSectionOp(converter, symTable, semaCtx, eval, loc,
                /*clauses=*/{}, queue, queue.begin());
 }
@@ -2478,9 +2484,11 @@ genOMP(Fortran::lower::AbstractConverter &converter,
 
   llvm::omp::Directive directive =
       std::get<parser::OmpSectionsDirective>(beginSectionsDirective.t).v;
+  const parser::CharBlock &source =
+      std::get<parser::OmpSectionsDirective>(beginSectionsDirective.t).source;
   ConstructQueue queue{
       buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
-                          eval, directive, clauses)};
+                          eval, source, directive, clauses)};
   genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
                  queue.begin());
 }
diff --git a/flang/test/Integration/OpenMP/copyprivate.f90 b/flang/test/Integration/OpenMP/copyprivate.f90
index d32319a18c28bb..ecb8a8c24648a5 100644
--- a/flang/test/Integration/OpenMP/copyprivate.f90
+++ b/flang/test/Integration/OpenMP/copyprivate.f90
@@ -45,16 +45,16 @@
 !CHECK:       [[OMP_REGION_END]]:
 !CHECK:         %[[THREAD_NUM2:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[LOC:.*]])
 !CHECK:         %[[DID_IT_VAL:.*]] = load i32, ptr %[[DID_IT]]
-!CHECK:         call void @__kmpc_copyprivate(ptr @[[LOC]], i32 %[[THREAD_NUM2]], i64 0, ptr %[[I]], ptr @_copy_i32, i32 %[[DID_IT_VAL]])
+!CHECK:         call void @__kmpc_copyprivate(ptr @[[LOC]], i32 %[[THREAD_NUM2]], i64 0, ptr %[[J]], ptr @_copy_i32, i32 %[[DID_IT_VAL]])
 !CHECK:         %[[THREAD_NUM3:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[LOC]])
 !CHECK:         %[[DID_IT_VAL2:.*]] = load i32, ptr %[[DID_IT]]
-!CHECK:         call void @__kmpc_copyprivate(ptr @[[LOC]], i32 %[[THREAD_NUM3]], i64 0, ptr %[[J]], ptr @_copy_i32, i32 %[[DID_IT_VAL2]])
+!CHECK:         call void @__kmpc_copyprivate(ptr @[[LOC]], i32 %[[THREAD_NUM3]], i64 0, ptr %[[I]], ptr @_copy_i32, i32 %[[DID_IT_VAL2]])
 
 !CHECK:       [[OMP_REGION_BODY]]:
 !CHECK:         br label %[[OMP_SINGLE_REGION:.*]]
 !CHECK:       [[OMP_SINGLE_REGION]]:
-!CHECK:         store i32 11, ptr %[[I]]
-!CHECK:         store i32 22, ptr %[[J]]
+!CHECK:         store i32 11, ptr %[[J]]
+!CHECK:         store i32 22, ptr %[[I]]
 !CHECK:         br label %[[OMP_REGION_CONT3:.*]]
 !CHECK:       [[OMP_REGION_CONT3:.*]]:
 !CHECK:         store i32 1, ptr %[[DID_IT]]
diff --git a/flang/test/Lower/OpenMP/allocatable-array-bounds.f90 b/flang/test/Lower/OpenMP/allocatable-array-bounds.f90
index aeb56a0427e3bb..62c7f1bc98071e 100644
--- a/flang/test/Lower/OpenMP/allocatable-array-bounds.f90
+++ b/flang/test/Lower/OpenMP/allocatable-array-bounds.f90
@@ -8,6 +8,24 @@
 !HOST: %[[ALLOCA_2:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>> {bindc_name = "sp_write", uniq_name = "_QFread_write_sectionEsp_write"}
 !HOST: %[[DECLARE_2:.*]]:2 = hlfir.declare %[[ALLOCA_2]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFread_write_sectionEsp_write"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
 
+!HOST: %[[LOAD_3:.*]] = fir.load %[[DECLARE_2]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!HOST: %[[LOAD_4:.*]] = fir.load %[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!HOST: %[[CONSTANT_5:.*]] = arith.constant 0 : index
+!HOST: %[[BOX_3:.*]]:3 = fir.box_dims %[[LOAD_4]], %[[CONSTANT_5]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+!HOST: %[[CONSTANT_6:.*]] = arith.constant 0 : index
+!HOST: %[[BOX_4:.*]]:3 = fir.box_dims %[[LOAD_3]], %[[CONSTANT_6]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+!HOST: %[[CONSTANT_7:.*]] = arith.constant 2 : index
+!HOST: %[[LB_2:.*]] = arith.subi %[[CONSTANT_7]], %[[BOX_3]]#0 : index
+!HOST: %[[CONSTANT_8:.*]] = arith.constant 5 : index
+!HOST: %[[UB_2:.*]] = arith.subi %[[CONSTANT_8]], %[[BOX_3]]#0 : index
+!HOST: %[[LOAD_5:.*]] = fir.load %[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!HOST: %[[CONSTANT_5:.*]] = arith.constant 0 : index
+!HOST: %[[BOX_5:.*]]:3 = fir.box_dims %[[LOAD_5]], %[[CONSTANT_5]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+!HOST: %[[BOUNDS_2:.*]] = omp.map.bounds lower_bound(%[[LB_2]] : index) upper_bound(%[[UB_2]] : index) extent(%[[BOX_5]]#1 : index) stride(%[[BOX_4]]#2 : index) start_idx(%[[BOX_3]]#0 : index) {stride_in_bytes = true}
+!HOST: %[[VAR_PTR_PTR:.*]] = fir.box_offset %[[DECLARE_2]]#1 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
+!HOST: %[[MAP_INFO_MEMBER:.*]] = omp.map.info var_ptr(%[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%[[VAR_PTR_PTR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS_2]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
+!HOST: %[[MAP_INFO_2:.*]] = omp.map.info var_ptr(%[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_INFO_MEMBER]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "sp_write(2:5)"}
+
 !HOST: %[[LOAD_1:.*]] = fir.load %[[DECLARE_1]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
 !HOST: %[[LOAD_2:.*]] = fir.load %[[DECLARE_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
 !HOST: %[[CONSTANT_1:.*]] = arith.constant 0 : index
@@ -26,24 +44,6 @@
 !HOST: %[[MAP_INFO_MEMBER:.*]] = omp.map.info var_ptr(%[[DECLARE_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%[[VAR_PTR_PTR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS_1]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}    
 !HOST: %[[MAP_INFO_1:.*]] = omp.map.info var_ptr(%[[DECLARE_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_INFO_MEMBER]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "sp_read(2:5)"}
 
-!HOST: %[[LOAD_3:.*]] = fir.load %[[DECLARE_2]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-!HOST: %[[LOAD_4:.*]] = fir.load %[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-!HOST: %[[CONSTANT_5:.*]] = arith.constant 0 : index
-!HOST: %[[BOX_3:.*]]:3 = fir.box_dims %[[LOAD_4]], %[[CONSTANT_5]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-!HOST: %[[CONSTANT_6:.*]] = arith.constant 0 : index
-!HOST: %[[BOX_4:.*]]:3 = fir.box_dims %[[LOAD_3]], %[[CONSTANT_6]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-!HOST: %[[CONSTANT_7:.*]] = arith.constant 2 : index
-!HOST: %[[LB_2:.*]] = arith.subi %[[CONSTANT_7]], %[[BOX_3]]#0 : index
-!HOST: %[[CONSTANT_8:.*]] = arith.constant 5 : index
-!HOST: %[[UB_2:.*]] = arith.subi %[[CONSTANT_8]], %[[BOX_3]]#0 : index
-!HOST: %[[LOAD_5:.*]] = fir.load %[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-!HOST: %[[CONSTANT_5:.*]] = arith.constant 0 : index
-!HOST: %[[BOX_5:.*]]:3 = fir.box_dims %[[LOAD_5]], %[[CONSTANT_5]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-!HOST: %[[BOUNDS_2:.*]] = omp.map.bounds lower_bound(%[[LB_2]] : index) upper_bound(%[[UB_2]] : index) extent(%[[BOX_5]]#1 : index) stride(%[[BOX_4]]#2 : index) start_idx(%[[BOX_3]]#0 : index) {stride_in_bytes = true}
-!HOST: %[[VAR_PTR_PTR:.*]] = fir.box_offset %[[DECLARE_2]]#1 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
-!HOST: %[[MAP_INFO_MEMBER:.*]] = omp.map.info var_ptr(%[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%[[VAR_PTR_PTR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS_2]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}    
-!HOST: %[[MAP_INFO_2:.*]] = omp.map.info var_ptr(%[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_INFO_MEMBER]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "sp_write(2:5)"}
-
 subroutine read_write_section()
     integer, allocatable :: sp_read(:)
     integer, allocatable :: sp_write(:)
diff --git a/flang/test/Lower/OpenMP/array-bounds.f90 b/flang/test/Lower/OpenMP/array-bounds.f90
index 2c8a8999a2cce6..00fa51e90832d0 100644
--- a/flang/test/Lower/OpenMP/array-bounds.f90
+++ b/flang/test/Lower/OpenMP/array-bounds.f90
@@ -14,14 +14,14 @@
 !HOST:  %[[C1:.*]] = arith.constant 1 : index
 !HOST:  %[[C2:.*]] = arith.constant 1 : index
 !HOST:  %[[C3:.*]] = arith.constant 4 : index
-!HOST:  %[[BOUNDS0:.*]] = omp.map.bounds   lower_bound(%[[C2]] : index) upper_bound(%[[C3]] : index) extent(%[[C10]] : index) stride(%[[C1]] : index) start_idx(%[[C1]] : index)
-!HOST:  %[[MAP0:.*]] = omp.map.info var_ptr(%[[READ_DECL]]#0 : !fir.ref<!fir.array<10xi32>>, !fir.array<10xi32>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS0]]) -> !fir.ref<!fir.array<10xi32>> {name = "sp_read(2:5)"}
+!HOST:  %[[BOUNDS1:.*]] = omp.map.bounds   lower_bound(%[[C2]] : index) upper_bound(%[[C3]] : index) extent(%[[C10_0]] : index) stride(%[[C1]] : index) start_idx(%[[C1]] : index)
+!HOST:  %[[MAP1:.*]] = omp.map.info var_ptr(%[[WRITE_DECL]]#0 : !fir.ref<!fir.array<10xi32>>, !fir.array<10xi32>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS1]]) -> !fir.ref<!fir.array<10xi32>> {name = "sp_write(2:5)"}
 !HOST:  %[[C4:.*]] = arith.constant 1 : index
 !HOST:  %[[C5:.*]] = arith.constant 1 : index
 !HOST:  %[[C6:.*]] = arith.constant 4 : index
-!HOST:  %[[BOUNDS1:.*]] = omp.map.bounds   lower_bound(%[[C5]] : index) upper_bound(%[[C6]] : index) extent(%[[C10_0]] : index) stride(%[[C4]] : index) start_idx(%[[C4]] : index)
-!HOST:  %[[MAP1:.*]] = omp.map.info var_ptr(%[[WRITE_DECL]]#0 : !fir.ref<!fir.array<10xi32>>, !fir.array<10xi32>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS1]]) -> !fir.ref<!fir.array<10xi32>> {name = "sp_write(2:5)"}
-!HOST:  omp.target map_entries(%[[MAP0]] -> %{{.*}}, %[[MAP1]] -> %{{.*}}, {{.*}} -> {{.*}} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>, !fir.ref<i32>) {
+!HOST:  %[[BOUNDS0:.*]] = omp.map.bounds   lower_bound(%[[C5]] : index) upper_bound(%[[C6]] : index) extent(%[[C10]] : index) stride(%[[C4]] : index) start_idx(%[[C4]] : index)
+!HOST:  %[[MAP0:.*]] = omp.map.info var_ptr(%[[READ_DECL]]#0 : !fir.ref<!fir.array<10xi32>>, !fir.array<10xi32>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS0]]) -> !fir.ref<!fir.array<10xi32>> {name = "sp_read(2:5)"}
+!HOST:  omp.target map_entries(%[[MAP1]] -> %{{.*}}, %[[MAP0]] -> %{{.*}}, {{.*}} -> {{.*}} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>, !fir.ref<i32>) {
 
 subroutine read_write_section()
     integer :: sp_read(10) = (/1,2,3,4,5,6,7,8,9,10/)
diff --git a/flang/test/Lower/OpenMP/copyprivate.f90 b/flang/test/Lower/OpenMP/copyprivate.f90
index 9b76a996ef3e16..c2d935db171ad8 100644
--- a/flang/test/Lower/OpenMP/copyprivate.f90
+++ b/flang/test/Lower/OpenMP/copyprivate.f90
@@ -60,18 +60,18 @@ subroutine test_tp()
 
 !CHECK-LABEL: func @_QPtest_scalar
 !CHECK:         omp.parallel
-!CHECK:           %[[I1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:           %[[I2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi2"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
-!CHECK:           %[[I3:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi3"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
-!CHECK:           %[[R1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEr1"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:           %[[R2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEr2"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
-!CHECK:           %[[C1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEc1"} : (!fir.ref<!fir.complex<4>>) -> (!fir.ref<!fir.complex<4>>, !fir.ref<!fir.complex<4>>)
-!CHECK:           %[[C2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEc2"} : (!fir.ref<!fir.complex<8>>) -> (!fir.ref<!fir.complex<8>>, !fir.ref<!fir.complex<8>>)
-!CHECK:           %[[L1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEl1"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-!CHECK:           %[[L2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEl2"} : (!fir.ref<!fir.logical<8>>) -> (!fir.ref<!fir.logical<8>>, !fir.ref<!fir.logical<8>>)
-!CHECK:           %[[S1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEs1"} : (!fir.ref<!fir.char<1,3>>, index) -> (!fir.ref<!fir.char<1,3>>, !fir.ref<!fir.char<1,3>>)
-!CHECK:           %[[S2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEs2"} : (!fir.ref<!fir.char<1,8>>, index) -> (!fir.ref<!fir.char<1,8>>, !fir.ref<!fir.char<1,8>>)
 !CHECK:           %[[S3:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEs3"} : (!fir.ref<!fir.char<2,8>>, index) -> (!fir.ref<!fir.char<2,8>>, !fir.ref<!fir.char<2,8>>)
+!CHECK:           %[[S2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEs2"} : (!fir.ref<!fir.char<1,8>>, index) -> (!fir.ref<!fir.char<1,8>>, !fir.ref<!fir.char<1,8>>)
+!CHECK:           %[[S1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEs1"} : (!fir.ref<!fir.char<1,3>>, index) -> (!fir.ref<!fir.char<1,3>>, !fir.ref<!fir.char<1,3>>)
+!CHECK:           %[[L2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEl2"} : (!fir.ref<!fir.logical<8>>) -> (!fir.ref<!fir.logical<8>>, !fir.ref<!fir.logical<8>>)
+!CHECK:           %[[L1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEl1"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+!CHECK:           %[[C2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEc2"} : (!fir.ref<!fir.complex<8>>) -> (!fir.ref<!fir.complex<8>>, !fir.ref<!fir.complex<8>>)
+!CHECK:           %[[C1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEc1"} : (!fir.ref<!fir.complex<4>>) -> (!fir.ref<!fir.complex<4>>, !fir.ref<!fir.complex<4>>)
+!CHECK:           %[[R2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEr2"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+!CHECK:           %[[R1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEr1"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:           %[[I3:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi3"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+!CHECK:           %[[I2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi2"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+!CHECK:           %[[I1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:           omp.single copyprivate(%[[I1]]#0 -> @_copy_i32 : !fir.ref<i32>, %[[I2]]#0 -> @_copy_i64 : !fir.ref<i64>, %[[I3]]#0 -> @_copy_i64 : !fir.ref<i64>, %[[R1]]#0 -> @_copy_f32 : !fir.ref<f32>, %[[R2]]#0 -> @_copy_f64 : !fir.ref<f64>, %[[C1]]#0 -> @_copy_z32 : !fir.ref<!fir.complex<4>>, %[[C2]]#0 -> @_copy_z64 : !fir.ref<!fir.complex<8>>, %[[L1]]#0 -> @_copy_l32 : !fir.ref<!fir.logical<4>>, %[[L2]]#0 -> @_copy_l64 : !fir.ref<!fir.logical<8>>, %[[S1]]#0 -> @_copy_c8x3 : !fir.ref<!fir.char<1,3>>, %[[S2]]#0 -> @_copy_c8x8 : !fir.ref<!fir.char<1,8>>, %[[S3]]#0 -> @_copy_c16x8 : !fir.ref<!fir.char<2,8>>)
 subroutine test_scalar()
   integer(4) :: i1
@@ -94,15 +94,15 @@ subroutine test_scalar()
 
 !CHECK-LABEL: func @_QPtest_array
 !CHECK:         omp.parallel
-!CHECK:           %[[A:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEa"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
-!CHECK:           %[[I1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi1"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
-!CHECK:           %[[I2:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi2"} : (!fir.ref<!fir.array<3x4xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x4xi32>>, !fir.ref<!fir.array<3x4xi32>>)
-!CHECK:           %[[I3:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi3"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
-!CHECK:           %[[R1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEr1"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
-!CHECK:           %[[C1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEc1"} : (!fir.ref<!fir.array<3x4x!fir.complex<4>>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x4x!fir.complex<4>>>, !fir.ref<!fir.array<3x4x!fir.complex<4>>>)
-!CHECK:           %[[L1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEl1"} : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.ref<!fir.array<10x!fir.logical<4>>>)
-!CHECK:           %[[S1:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_arrayEs1"} : (!fir.ref<!fir.array<3x!fir.char<1,8>>>, !fir.shape<1>, index) -> (!fir.ref<!fir.array<3x!fir.char<1,8>>>, !fir.ref<!fir.array<3x!fir.char<1,8>>>)
 !CHECK:           %[[S2:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_arrayEs2"} : (!fir.ref<!fir.array<3x!fir.char<2,5>>>, !fir.shape<1>, index) -> (!fir.ref<!fir.array<3x!fir.char<2,5>>>, !fir.ref<!fir.array<3x!fir.char<2,5>>>)
+!CHECK:           %[[S1:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_arrayEs1"} : (!fir.ref<!fir.array<3x!fir.char<1,8>>>, !fir.shape<1>, index) -> (!fir.ref<!fir.array<3x!fir.char<1,8>>>, !fir.ref<!fir.array<3x!fir.char<1,8>>>)
+!CHECK:           %[[L1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEl1"} : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.ref<!fir.array<10x!fir.logical<4>>>)
+!CHECK:           %[[C1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEc1"} : (!fir.ref<!fir.array<3x4x!fir.complex<4>>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x4x!fir.complex<4>>>, !fir.ref<!fir.array<3x4x!fir.complex<4>>>)
+!CHECK:           %[[R1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEr1"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
+!CHECK:           %[[I3:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi3"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
+!CHECK:           %[[I2:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi2"} : (!fir.ref<!fir.array<3x4xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x4xi32>>, !fir.ref<!fir.array<3x4xi32>>)
+!CHECK:           %[[I1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi1"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+!CHECK:           %[[A:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEa"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
 !CHECK:           %[[A_REF:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
 !CHECK:           fir.store %[[A]]#0 to %[[A_REF]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
 !CHECK:           %[[I3_REF:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
@@ -142,12 +142,12 @@ subroutine test_dt()
 
 !CHECK-LABEL: func @_QPtest_attr
 !CHECK:         omp.parallel
-!CHECK:           %[[I1:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_attrEi1"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
-!CHECK:           %[[I2:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_attrEi2"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
-!CHECK:           %[[I3:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_attrEi3"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
-!CHECK:           %[[R1:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_attrEr1"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)
-!CHECK:           %[[C1:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_attrEc1"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>)
 !CHECK:           %[[C2:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_attrEc2"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,9>>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,9>>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,9>>>>>)
+!CHECK:           %[[C1:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_attrEc1"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>)
+!CHECK:           %[[R1:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_attrEr1"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)
+!CHECK:           %[[I3:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_attrEi3"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
+!CHECK:           %[[I2:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_attrEi2"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
+!CHECK:           %[[I1:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_attrEi1"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
 !CHECK:           omp.single copyprivate(%[[I1]]#0 -> @_copy_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %[[I2:.*]]#0 -> @_copy_box_heap_i32 : !fir.ref<!fir.box<!fir.heap<i32>>>, %[[I3]]#0 -> @_copy_box_ptr_i32 : !fir.ref<!fir.box<!fir.ptr<i32>>>, %[[R1]]#0 -> @_copy_box_ptr_Uxf32 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>,  %[[C1]]#0 -> @_copy_box_heap_Uxc8x5 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>, %[[C2]]#0 -> @_copy_box_ptr_Uxc8x9 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,9>>>>>)
 subroutine test_attr()
   integer, allocatable :: i1(:)
diff --git a/flang/test/Lower/OpenMP/default-clause-byref.f90 b/flang/test/Lower/OpenMP/default-clause-byref.f90
index 62ba67e5962f49..50bacb0d683bc5 100644
--- a/flang/test/Lower/OpenMP/default-clause-byref.f90
+++ b/flang/test/Lower/OpenMP/default-clause-byref.f90
@@ -53,10 +53,10 @@ program default_clause_lowering
     !$omp end parallel
 
 !CHECK: omp.parallel {
-!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
-!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
 !CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
 !CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
 !CHECK: omp.terminator
@@ -161,12 +161,12 @@ subroutine nested_default_clause_tests
 !CHECK: %[[Z:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFnested_default_clause_testsEz"}
 !CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[Z]] {uniq_name = "_QFnested_default_clause_testsEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: omp.parallel   {
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
 !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
 !CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
-!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
-!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[PRIVATE_Z:.*]] = fir.alloca i32 {bindc_name = "z", pinned, uniq_name = "_QFnested_default_clause_testsEz"}
 !CHECK: %[[PRIVATE_Z_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Z]] {uniq_name = "_QFnested_default_clause_testsEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[PRIVATE_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFnested_default_clause_testsEk"}
@@ -221,6 +221,7 @@ subroutine nested_default_clause_tests
     
     
 !CHECK: omp.parallel {
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
 !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
 !CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
diff --git a/flang/test/Lower/OpenMP/default-clause.f90 b/flang/test/Lower/OpenMP/default-clause.f90
index a90f0f4ef5f841..d690a49093ffc6 100644
--- a/flang/test/Lower/OpenMP/default-clause.f90
+++ b/flang/test/Lower/OpenMP/default-clause.f90
@@ -53,10 +53,10 @@ program default_clause_lowering
     !$omp end parallel
 
 !CHECK: omp.parallel {
-!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
-!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
 !CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
+!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
 !CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
 !CHECK: omp.terminator
@@ -160,12 +160,12 @@ end program default_clause_lowering
 !CHECK: %[[Z:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFnested_default_clause_test1Ez"}
 !CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[Z]] {uniq_name = "_QFnested_default_clause_test1Ez"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: omp.parallel   {
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_test1Ey"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_test1Ey"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_test1Ex"}
 !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_test1Ex"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
 !CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
-!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_test1Ey"}
-!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_test1Ey"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[PRIVATE_Z:.*]] = fir.alloca i32 {bindc_name = "z", pinned, uniq_name = "_QFnested_default_clause_test1Ez"}
 !CHECK: %[[PRIVATE_Z_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Z]] {uniq_name = "_QFnested_default_clause_test1Ez"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[PRIVATE_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFnested_default_clause_test1Ek"}
diff --git a/flang/test/Lower/OpenMP/parallel-firstprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/parallel-firstprivate-clause-scalar.f90
index 6402f98a2addc4..0741d3d28feba3 100644
--- a/flang/test/Lower/OpenMP/parallel-firstprivate-clause-scalar.f90
+++ b/flang/test/Lower/OpenMP/parallel-firstprivate-clause-scalar.f90
@@ -7,14 +7,14 @@
 !CHECK:    %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFfirstprivate_complexEarg1"} : (!fir.ref<!fir.complex<4>>) -> (!fir.ref<!fir.complex<4>>, !fir.ref<!fir.complex<4>>)
 !CHECK:    %[[ARG2_DECL:.*]]:2 = hlfir.declare %[[ARG2]] {uniq_name = "_QFfirstprivate_complexEarg2"} : (!fir.ref<!fir.complex<8>>) -> (!fir.ref<!fir.complex<8>>, !fir.ref<!fir.complex<8>>)
 !CHECK:   omp.parallel {
-!CHECK:     %[[ARG1_PVT:.*]] = fir.alloca !fir.complex<4> {bindc_name = "arg1", pinned, uniq_name = "_QFfirstprivate_complexEarg1"}
-!CHECK:     %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] {uniq_name = "_QFfirstprivate_complexEarg1"} : (!fir.ref<!fir.complex<4>>) -> (!fir.ref<!fir.complex<4>>, !fir.ref<!fir.complex<4>>)
-!CHECK:     %[[ARG1_VAL:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<!fir.complex<4>>
-!CHECK:      hlfir.assign %[[ARG1_VAL]] to %[[ARG1_PVT_DECL]]#0 temporary_lhs : !fir.complex<4>, !fir.ref<!fir.complex<4>>
 !CHECK:     %[[ARG2_PVT:.*]] = fir.alloca !fir.complex<8> {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_complexEarg2"}
 !CHECK:     %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_complexEarg2"} : (!fir.ref<!fir.complex<8>>) -> (!fir.ref<!fir.complex<8>>, !fir.ref<!fir.complex<8>>)
 !CHECK:     %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref<!fir.complex<8>>
 !CHECK:     hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : !fir.complex<8>, !fir.ref<!fir.complex<8>>
+!CHECK:     %[[ARG1_PVT:.*]] = fir.alloca !fir.complex<4> {bindc_name = "arg1", pinned, uniq_name = "_QFfirstprivate_complexEarg1"}
+!CHECK:     %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] {uniq_name = "_QFfirstprivate_complexEarg1"} : (!fir.ref<!fir.complex<4>>) -> (!fir.ref<!fir.complex<4>>, !fir.ref<!fir.complex<4>>)
+!CHECK:     %[[ARG1_VAL:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<!fir.complex<4>>
+!CHECK:      hlfir.assign %[[ARG1_VAL]] to %[[ARG1_PVT_DECL]]#0 temporary_lhs : !fir.complex<4>, !fir.ref<!fir.complex<4>>
 !CHECK:     fir.call @_QPfoo(%[[ARG1_PVT_DECL]]#1, %[[ARG2_PVT_DECL]]#1) {{.*}}: (!fir.ref<!fir.complex<4>>, !fir.ref<!fir.complex<8>>) -> ()
 !CHECK:     omp.terminator
 !CHECK:   }
@@ -37,30 +37,30 @@ subroutine firstprivate_complex(arg1, arg2)
 !CHECK:  %[[ARG5_DECL:.*]]:2 = hlfir.declare %[[ARG5]] {uniq_name = "_QFfirstprivate_integerEarg5"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
 !CHECK:  %[[ARG6_DECL:.*]]:2 = hlfir.declare %[[ARG6]] {uniq_name = "_QFfirstprivate_integerEarg6"} : (!fir.ref<i128>) -> (!fir.ref<i128>, !fir.ref<i128>)
 !CHECK:  omp.parallel {
-!CHECK:    %[[ARG1_PVT:.*]] = fir.alloca i32 {bindc_name = "arg1", pinned, uniq_name = "_QFfirstprivate_integerEarg1"}
-!CHECK:    %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] {uniq_name = "_QFfirstprivate_integerEarg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:    %[[ARG1_VAL:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<i32>
-!CHECK:    hlfir.assign %[[ARG1_VAL]] to %[[ARG1_PVT_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
-!CHECK:    %[[ARG2_PVT:.*]] = fir.alloca i8 {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_integerEarg2"}
-!CHECK:    %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_integerEarg2"} : (!fir.ref<i8>) -> (!fir.ref<i8>, !fir.ref<i8>)
-!CHECK:    %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref<i8>
-!CHECK:    hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : i8, !fir.ref<i8>
-!CHECK:    %[[ARG3_PVT:.*]] = fir.alloca i16 {bindc_name = "arg3", pinned, uniq_name = "_QFfirstprivate_integerEarg3"}
-!CHECK:    %[[ARG3_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG3_PVT]] {uniq_name = "_QFfirstprivate_integerEarg3"} : (!fir.ref<i16>) -> (!fir.ref<i16>, !fir.ref<i16>)
-!CHECK:    %[[ARG3_VAL:.*]] = fir.load %[[ARG3_DECL]]#0 : !fir.ref<i16>
-!CHECK:    hlfir.assign %[[ARG3_VAL]] to %[[ARG3_PVT_DECL]]#0 temporary_lhs : i16, !fir.ref<i16>
-!CHECK:    %[[ARG4_PVT:.*]] = fir.alloca i32 {bindc_name = "arg4", pinned, uniq_name = "_QFfirstprivate_integerEarg4"}
-!CHECK:    %[[ARG4_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG4_PVT]] {uniq_name = "_QFfirstprivate_integerEarg4"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:    %[[ARG4_VAL:.*]] = fir.load %[[ARG4_DECL]]#0 : !fir.ref<i32>
-!CHECK:    hlfir.assign %[[ARG4_VAL]] to %[[ARG4_PVT_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
-!CHECK:    %[[ARG5_PVT:.*]] = fir.alloca i64 {bindc_name = "arg5", pinned, uniq_name = "_QFfirstprivate_integerEarg5"}
-!CHECK:    %[[ARG5_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG5_PVT]] {uniq_name = "_QFfirstprivate_integerEarg5"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
-!CHECK:    %[[ARG5_VAL:.*]] = fir.load %[[ARG5_DECL]]#0 : !fir.ref<i64>
-!CHECK:    hlfir.assign %[[ARG5_VAL]] to %[[ARG5_PVT_DECL]]#0 temporary_lhs : i64, !fir.ref<i64>
 !CHECK:    %[[ARG6_PVT:.*]] = fir.alloca i128 {bindc_name = "arg6", pinned, uniq_name = "_QFfirstprivate_integerEarg6"}
 !CHECK:    %[[ARG6_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG6_PVT]] {uniq_name = "_QFfirstprivate_integerEarg6"} : (!fir.ref<i128>) -> (!fir.ref<i128>, !fir.ref<i128>)
 !CHECK:    %[[ARG6_VAL:.*]] = fir.load %[[ARG6_DECL]]#0 : !fir.ref<i128>
 !CHECK:    hlfir.assign %[[ARG6_VAL]] to %[[ARG6_PVT_DECL]]#0 temporary_lhs : i128, !fir.ref<i128>
+!CHECK:    %[[ARG5_PVT:.*]] = fir.alloca i64 {bindc_name = "arg5", pinned, uniq_name = "_QFfirstprivate_integerEarg5"}
+!CHECK:    %[[ARG5_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG5_PVT]] {uniq_name = "_QFfirstprivate_integerEarg5"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+!CHECK:    %[[ARG5_VAL:.*]] = fir.load %[[ARG5_DECL]]#0 : !fir.ref<i64>
+!CHECK:    hlfir.assign %[[ARG5_VAL]] to %[[ARG5_PVT_DECL]]#0 temporary_lhs : i64, !fir.ref<i64>
+!CHECK:    %[[ARG4_PVT:.*]] = fir.alloca i32 {bindc_name = "arg4", pinned, uniq_name = "_QFfirstprivate_integerEarg4"}
+!CHECK:    %[[ARG4_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG4_PVT]] {uniq_name = "_QFfirstprivate_integerEarg4"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:    %[[ARG4_VAL:.*]] = fir.load %[[ARG4_DECL]]#0 : !fir.ref<i32>
+!CHECK:    hlfir.assign %[[ARG4_VAL]] to %[[ARG4_PVT_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK:    %[[ARG3_PVT:.*]] = fir.alloca i16 {bindc_name = "arg3", pinned, uniq_name = "_QFfirstprivate_integerEarg3"}
+!CHECK:    %[[ARG3_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG3_PVT]] {uniq_name = "_QFfirstprivate_integerEarg3"} : (!fir.ref<i16>) -> (!fir.ref<i16>, !fir.ref<i16>)
+!CHECK:    %[[ARG3_VAL:.*]] = fir.load %[[ARG3_DECL]]#0 : !fir.ref<i16>
+!CHECK:    hlfir.assign %[[ARG3_VAL]] to %[[ARG3_PVT_DECL]]#0 temporary_lhs : i16, !fir.ref<i16>
+!CHECK:    %[[ARG2_PVT:.*]] = fir.alloca i8 {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_integerEarg2"}
+!CHECK:    %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_integerEarg2"} : (!fir.ref<i8>) -> (!fir.ref<i8>, !fir.ref<i8>)
+!CHECK:    %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref<i8>
+!CHECK:    hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : i8, !fir.ref<i8>
+!CHECK:    %[[ARG1_PVT:.*]] = fir.alloca i32 {bindc_name = "arg1", pinned, uniq_name = "_QFfirstprivate_integerEarg1"}
+!CHECK:    %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] {uniq_name = "_QFfirstprivate_integerEarg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:    %[[ARG1_VAL:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<i32>
+!CHECK:    hlfir.assign %[[ARG1_VAL]] to %[[ARG1_PVT_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
 !CHECK:    fir.call @_QPbar(%[[ARG1_PVT_DECL]]#1, %[[ARG2_PVT_DECL]]#1, %[[ARG3_PVT_DECL]]#1, %[[ARG4_PVT_DECL]]#1,
 !%[[ARG5_PVT_DECL]]#1, %[[ARG6_PVT_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i8>, !fir.ref<i16>, !fir.ref<i32>, !fir.ref<i64>, !fir.ref<i128>) -> ()
 !CHECK:    omp.terminator
@@ -87,26 +87,26 @@ subroutine firstprivate_integer(arg1, arg2, arg3, arg4, arg5, arg6)
 !CHECK:    %[[ARG4_DECL:.*]]:2 = hlfir.declare %[[ARG4]] {uniq_name = "_QFfirstprivate_logicalEarg4"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 !CHECK:    %[[ARG5_DECL:.*]]:2 = hlfir.declare %[[ARG5]] {uniq_name = "_QFfirstprivate_logicalEarg5"} : (!fir.ref<!fir.logical<8>>) -> (!fir.ref<!fir.logical<8>>, !fir.ref<!fir.logical<8>>)
 !CHECK:   omp.parallel {
-!CHECK:     %[[ARG1_PVT:.*]] = fir.alloca !fir.logical<4> {bindc_name = "arg1", pinned, uniq_name = "_QFfirstprivate_logicalEarg1"}
-!CHECK:     %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg1"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-!CHECK:     %[[ARG1_VAL:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<!fir.logical<4>>
-!CHECK:     hlfir.assign %[[ARG1_VAL]] to %[[ARG1_PVT_DECL]]#0 temporary_lhs : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-!CHECK:     %[[ARG2_PVT:.*]] = fir.alloca !fir.logical<1> {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_logicalEarg2"}
-!CHECK:     %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg2"} : (!fir.ref<!fir.logical<1>>) -> (!fir.ref<!fir.logical<1>>, !fir.ref<!fir.logical<1>>)
-!CHECK:     %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref<!fir.logical<1>>
-!CHECK:     hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : !fir.logical<1>, !fir.ref<!fir.logical<1>>
-!CHECK:     %[[ARG3_PVT:.*]] = fir.alloca !fir.logical<2> {bindc_name = "arg3", pinned, uniq_name = "_QFfirstprivate_logicalEarg3"}
-!CHECK:     %[[ARG3_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG3_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg3"} : (!fir.ref<!fir.logical<2>>) -> (!fir.ref<!fir.logical<2>>, !fir.ref<!fir.logical<2>>)
-!CHECK:     %[[ARG3_VAL:.*]] = fir.load %[[ARG3_DECL]]#0 : !fir.ref<!fir.logical<2>>
-!CHECK:     hlfir.assign %[[ARG3_VAL]] to %[[ARG3_PVT_DECL]]#0 temporary_lhs : !fir.logical<2>, !fir.ref<!fir.logical<2>>
-!CHECK:     %[[ARG4_PVT:.*]] = fir.alloca !fir.logical<4> {bindc_name = "arg4", pinned, uniq_name = "_QFfirstprivate_logicalEarg4"}
-!CHECK:     %[[ARG4_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG4_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg4"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-!CHECK:     %[[ARG4_VAL:.*]] = fir.load %[[ARG4_DECL]]#0 : !fir.ref<!fir.logical<4>>
-!CHECK:     hlfir.assign %[[ARG4_VAL]] to %[[ARG4_PVT_DECL]]#0 temporary_lhs : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 !CHECK:     %[[ARG5_PVT:.*]] = fir.alloca !fir.logical<8> {bindc_name = "arg5", pinned, uniq_name = "_QFfirstprivate_logicalEarg5"}
 !CHECK:     %[[ARG5_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG5_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg5"} : (!fir.ref<!fir.logical<8>>) -> (!fir.ref<!fir.logical<8>>, !fir.ref<!fir.logical<8>>)
 !CHECK:     %[[ARG5_VAL:.*]] = fir.load %[[ARG5_DECL]]#0 : !fir.ref<!fir.logical<8>>
 !CHECK:     hlfir.assign %[[ARG5_VAL]] to %[[ARG5_PVT_DECL]]#0 temporary_lhs : !fir.logical<8>, !fir.ref<!fir.logical<8>>
+!CHECK:     %[[ARG4_PVT:.*]] = fir.alloca !fir.logical<4> {bindc_name = "arg4", pinned, uniq_name = "_QFfirstprivate_logicalEarg4"}
+!CHECK:     %[[ARG4_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG4_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg4"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+!CHECK:     %[[ARG4_VAL:.*]] = fir.load %[[ARG4_DECL]]#0 : !fir.ref<!fir.logical<4>>
+!CHECK:     hlfir.assign %[[ARG4_VAL]] to %[[ARG4_PVT_DECL]]#0 temporary_lhs : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+!CHECK:     %[[ARG3_PVT:.*]] = fir.alloca !fir.logical<2> {bindc_name = "arg3", pinned, uniq_name = "_QFfirstprivate_logicalEarg3"}
+!CHECK:     %[[ARG3_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG3_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg3"} : (!fir.ref<!fir.logical<2>>) -> (!fir.ref<!fir.logical<2>>, !fir.ref<!fir.logical<2>>)
+!CHECK:     %[[ARG3_VAL:.*]] = fir.load %[[ARG3_DECL]]#0 : !fir.ref<!fir.logical<2>>
+!CHECK:     hlfir.assign %[[ARG3_VAL]] to %[[ARG3_PVT_DECL]]#0 temporary_lhs : !fir.logical<2>, !fir.ref<!fir.logical<2>>
+!CHECK:     %[[ARG2_PVT:.*]] = fir.alloca !fir.logical<1> {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_logicalEarg2"}
+!CHECK:     %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg2"} : (!fir.ref<!fir.logical<1>>) -> (!fir.ref<!fir.logical<1>>, !fir.ref<!fir.logical<1>>)
+!CHECK:     %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref<!fir.logical<1>>
+!CHECK:     hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : !fir.logical<1>, !fir.ref<!fir.logical<1>>
+!CHECK:     %[[ARG1_PVT:.*]] = fir.alloca !fir.logical<4> {bindc_name = "arg1", pinned, uniq_name = "_QFfirstprivate_logicalEarg1"}
+!CHECK:     %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg1"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+!CHECK:     %[[ARG1_VAL:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<!fir.logical<4>>
+!CHECK:     hlfir.assign %[[ARG1_VAL]] to %[[ARG1_PVT_DECL]]#0 temporary_lhs : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 !CHECK:     fir.call @_QPbaz(%[[ARG1_PVT_DECL]]#1, %[[ARG2_PVT_DECL]]#1, %[[ARG3_PVT_DECL]]#1, %[[ARG4_PVT_DECL]]#1, %[[ARG5_PVT_DECL]]#1) {{.*}}: (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<1>>, !fir.ref<!fir.logical<2>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<8>>) -> ()
 !CHECK:     omp.terminator
 !CHECK:   }
@@ -132,30 +132,30 @@ subroutine firstprivate_logical(arg1, arg2, arg3, arg4, arg5)
 !CHECK:   %[[ARG5_DECL:.*]]:2 = hlfir.declare %[[ARG5]] {uniq_name = "_QFfirstprivate_realEarg5"} : (!fir.ref<f80>) -> (!fir.ref<f80>, !fir.ref<f80>)
 !CHECK:   %[[ARG6_DECL:.*]]:2 = hlfir.declare %[[ARG6]] {uniq_name = "_QFfirstprivate_realEarg6"} : (!fir.ref<f128>) -> (!fir.ref<f128>, !fir.ref<f128>)
 !CHECK:   omp.parallel {
-!CHECK:     %[[ARG1_PVT:.*]] = fir.alloca f32 {bindc_name = "arg1", pinned, uniq_name = "_QFfirstprivate_realEarg1"}
-!CHECK:     %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] {uniq_name = "_QFfirstprivate_realEarg1"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:     %[[ARG1_VAL:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<f32>
-!CHECK:     hlfir.assign %[[ARG1_VAL]] to %[[ARG1_PVT_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
-!CHECK:     %[[ARG2_PVT:.*]] = fir.alloca f16 {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_realEarg2"}
-!CHECK:     %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_realEarg2"} : (!fir.ref<f16>) -> (!fir.ref<f16>, !fir.ref<f16>)
-!CHECK:     %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref<f16>
-!CHECK:     hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : f16, !fir.ref<f16>
-!CHECK:     %[[ARG3_PVT:.*]] = fir.alloca f32 {bindc_name = "arg3", pinned, uniq_name = "_QFfirstprivate_realEarg3"}
-!CHECK:     %[[ARG3_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG3_PVT]] {uniq_name = "_QFfirstprivate_realEarg3"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:     %[[ARG3_VAL:.*]] = fir.load %[[ARG3_DECL]]#0 : !fir.ref<f32>
-!CHECK:     hlfir.assign %[[ARG3_VAL]] to %[[ARG3_PVT_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
-!CHECK:     %[[ARG4_PVT:.*]] = fir.alloca f64 {bindc_name = "arg4", pinned, uniq_name = "_QFfirstprivate_realEarg4"}
-!CHECK:     %[[ARG4_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG4_PVT]] {uniq_name = "_QFfirstprivate_realEarg4"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
-!CHECK:     %[[ARG4_VAL:.*]] = fir.load %[[ARG4_DECL]]#0 : !fir.ref<f64>
-!CHECK:     hlfir.assign %[[ARG4_VAL]] to %[[ARG4_PVT_DECL]]#0 temporary_lhs : f64, !fir.ref<f64>
-!CHECK:     %[[ARG5_PVT:.*]] = fir.alloca f80 {bindc_name = "arg5", pinned, uniq_name = "_QFfirstprivate_realEarg5"}
-!CHECK:     %[[ARG5_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG5_PVT]] {uniq_name = "_QFfirstprivate_realEarg5"} : (!fir.ref<f80>) -> (!fir.ref<f80>, !fir.ref<f80>)
-!CHECK:     %[[ARG5_VAL:.*]] = fir.load %[[ARG5_DECL]]#0 : !fir.ref<f80>
-!CHECK:     hlfir.assign %[[ARG5_VAL]] to %[[ARG5_PVT_DECL]]#0 temporary_lhs : f80, !fir.ref<f80>
 !CHECK:     %[[ARG6_PVT:.*]] = fir.alloca f128 {bindc_name = "arg6", pinned, uniq_name = "_QFfirstprivate_realEarg6"}
 !CHECK:     %[[ARG6_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG6_PVT]] {uniq_name = "_QFfirstprivate_realEarg6"} : (!fir.ref<f128>) -> (!fir.ref<f128>, !fir.ref<f128>)
 !CHECK:     %[[ARG6_VAL:.*]] = fir.load %[[ARG6_DECL]]#0 : !fir.ref<f128>
 !CHECK:     hlfir.assign %[[ARG6_VAL]] to %[[ARG6_PVT_DECL]]#0 temporary_lhs : f128, !fir.ref<f128>
+!CHECK:     %[[ARG5_PVT:.*]] = fir.alloca f80 {bindc_name = "arg5", pinned, uniq_name = "_QFfirstprivate_realEarg5"}
+!CHECK:     %[[ARG5_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG5_PVT]] {uniq_name = "_QFfirstprivate_realEarg5"} : (!fir.ref<f80>) -> (!fir.ref<f80>, !fir.ref<f80>)
+!CHECK:     %[[ARG5_VAL:.*]] = fir.load %[[ARG5_DECL]]#0 : !fir.ref<f80>
+!CHECK:     hlfir.assign %[[ARG5_VAL]] to %[[ARG5_PVT_DECL]]#0 temporary_lhs : f80, !fir.ref<f80>
+!CHECK:     %[[ARG4_PVT:.*]] = fir.alloca f64 {bindc_name = "arg4", pinned, uniq_name = "_QFfirstprivate_realEarg4"}
+!CHECK:     %[[ARG4_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG4_PVT]] {uniq_name = "_QFfirstprivate_realEarg4"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+!CHECK:     %[[ARG4_VAL:.*]] = fir.load %[[ARG4_DECL]]#0 : !fir.ref<f64>
+!CHECK:     hlfir.assign %[[ARG4_VAL]] to %[[ARG4_PVT_DECL]]#0 temporary_lhs : f64, !fir.ref<f64>
+!CHECK:     %[[ARG3_PVT:.*]] = fir.alloca f32 {bindc_name = "arg3", pinned, uniq_name = "_QFfirstprivate_realEarg3"}
+!CHECK:     %[[ARG3_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG3_PVT]] {uniq_name = "_QFfirstprivate_realEarg3"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:     %[[ARG3_VAL:.*]] = fir.load %[[ARG3_DECL]]#0 : !fir.ref<f32>
+!CHECK:     hlfir.assign %[[ARG3_VAL]] to %[[ARG3_PVT_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
+!CHECK:     %[[ARG2_PVT:.*]] = fir.alloca f16 {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_realEarg2"}
+!CHECK:     %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_realEarg2"} : (!fir.ref<f16>) -> (!fir.ref<f16>, !fir.ref<f16>)
+!CHECK:     %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref<f16>
+!CHECK:     hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : f16, !fir.ref<f16>
+!CHECK:     %[[ARG1_PVT:.*]] = fir.alloca f32 {bindc_name = "arg1", pinned, uniq_name = "_QFfirstprivate_realEarg1"}
+!CHECK:     %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] {uniq_name = "_QFfirstprivate_realEarg1"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:     %[[ARG1_VAL:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<f32>
+!CHECK:     hlfir.assign %[[ARG1_VAL]] to %[[ARG1_PVT_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
 !CHECK:     fir.call @_QPqux(%[[ARG1_PVT_DECL]]#1, %[[ARG2_PVT_DECL]]#1, %[[ARG3_PVT_DECL]]#1, %[[ARG4_PVT_DECL]]#1, %[[ARG5_PVT_DECL]]#1, %[[ARG6_PVT_DECL]]#1) {{.*}}: (!fir.ref<f32>, !fir.ref<f16>, !fir.ref<f32>, !fir.ref<f64>, !fir.ref<f80>, !fir.ref<f128>) -> ()
 !CHECK:     omp.terminator
 !CHECK:   }
diff --git a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90
index bb81e5eac62f56..9dba50758f8217 100644
--- a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90
+++ b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90
@@ -145,10 +145,10 @@ subroutine mult_lastprivate_int(arg1, arg2)
 !CHECK: %[[ARG1_DECL:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFmult_lastprivate_int2Earg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[ARG2_DECL:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFmult_lastprivate_int2Earg2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: omp.parallel  {
-!CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
-!CHECK-DAG: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFmult_lastprivate_int2Earg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
 !CHECK-DAG: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFmult_lastprivate_int2Earg2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
+!CHECK-DAG: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFmult_lastprivate_int2Earg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: omp.wsloop {
 !CHECK-NEXT: omp.loop_nest (%[[INDX_WS:.*]]) : {{.*}} {
 
diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
index ac8b9f50f54e67..474fab379cd490 100644
--- a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
+++ b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
@@ -47,14 +47,14 @@ subroutine omp_do_firstprivate2(a, n)
   ! CHECK:  omp.parallel {
   ! CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
   ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_firstprivate2Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  ! CHECK: %[[A_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "a", pinned
-  ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_firstprivate2Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  ! CHECK: %[[LD:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
-  ! CHECK: hlfir.assign %[[LD]] to %[[A_PVT_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
   ! CHECK: %[[N_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "n", pinned, uniq_name = "_QFomp_do_firstprivate2En"}
   ! CHECK: %[[N_PVT_DECL:.*]]:2 = hlfir.declare %[[N_PVT_REF]] {uniq_name = "_QFomp_do_firstprivate2En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   ! CHECK: %[[LD1:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<i32>
   ! CHECK: hlfir.assign %[[LD1]] to %[[N_PVT_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+  ! CHECK: %[[A_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "a", pinned
+  ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_firstprivate2Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  ! CHECK: %[[LD:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
+  ! CHECK: hlfir.assign %[[LD]] to %[[A_PVT_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
 
 
   ! CHECK: %[[LB:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90
index 429253efdc8090..c2be161b83e2b2 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90
@@ -1,17 +1,6 @@
 ! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
 ! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
 
-!CHECK-LABEL: omp.declare_reduction
-!CHECK-SAME: @[[MIN_RED_I32_NAME:.*]] : i32 init {
-!CHECK: ^bb0(%{{.*}}: i32):
-!CHECK:  %[[C0_1:.*]] = arith.constant 2147483647 : i32
-!CHECK:  omp.yield(%[[C0_1]] : i32)
-!CHECK: } combiner {
-!CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32):
-!CHECK:  %[[RES:.*]] = arith.minsi %[[ARG0]], %[[ARG1]] : i32
-!CHECK:  omp.yield(%[[RES]] : i32)
-!CHECK: }
-
 !CHECK-LABEL: omp.declare_reduction
 !CHECK-SAME: @[[ADD_RED_F32_NAME:.*]] : f32 init {
 !CHECK: ^bb0(%{{.*}}: f32):
@@ -34,6 +23,17 @@
 !CHECK:  omp.yield(%[[RES]] : i32)
 !CHECK: }
 
+!CHECK-LABEL: omp.declare_reduction
+!CHECK-SAME: @[[MIN_RED_I32_NAME:.*]] : i32 init {
+!CHECK: ^bb0(%{{.*}}: i32):
+!CHECK:  %[[C0_1:.*]] = arith.constant 2147483647 : i32
+!CHECK:  omp.yield(%[[C0_1]] : i32)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32):
+!CHECK:  %[[RES:.*]] = arith.minsi %[[ARG0]], %[[ARG1]] : i32
+!CHECK:  omp.yield(%[[RES]] : i32)
+!CHECK: }
+
 !CHECK-LABEL: func.func @_QPmultiple_reduction
 !CHECK:      %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductionEx"}
 !CHECK:      %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFmultiple_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -42,13 +42,13 @@
 !CHECK:      %[[Z_REF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_reductionEz"}
 !CHECK:      %[[Z_DECL:.*]]:2 = hlfir.declare %[[Z_REF]] {uniq_name = "_QFmultiple_reductionEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:      omp.wsloop reduction(
-!CHECK-SAME: @[[ADD_RED_I32_NAME]] %[[X_DECL]]#0 -> %[[PRV_X:.+]] : !fir.ref<i32>,
-!CHECK-SAME: @[[ADD_RED_F32_NAME]] %[[Y_DECL]]#0 -> %[[PRV_Y:.+]] : !fir.ref<f32>,
-!CHECK-SAME: @[[MIN_RED_I32_NAME]] %[[Z_DECL]]#0 -> %[[PRV_Z:.+]] : !fir.ref<i32>) {
+!CHECK-SAME: @[[MIN_RED_I32_NAME]] %[[Z_DECL]]#0 -> %[[PRV_Z:[a-z0-9]+]] : !fir.ref<i32>,
+!CHECK-SAME: @[[ADD_RED_I32_NAME]] %[[X_DECL]]#0 -> %[[PRV_X:[a-z0-9]+]] : !fir.ref<i32>,
+!CHECK-SAME: @[[ADD_RED_F32_NAME]] %[[Y_DECL]]#0 -> %[[PRV_Y:[a-z0-9]+]] : !fir.ref<f32>) {
 !CHECK-NEXT:   omp.loop_nest {{.*}} {
+!CHECK:          %[[PRV_Z_DECL:.+]]:2 = hlfir.declare %[[PRV_Z]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:          %[[PRV_X_DECL:.+]]:2 = hlfir.declare %[[PRV_X]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:          %[[PRV_Y_DECL:.+]]:2 = hlfir.declare %[[PRV_Y]] {{.*}} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:          %[[PRV_Z_DECL:.+]]:2 = hlfir.declare %[[PRV_Z]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:          %[[LPRV_X:.+]] = fir.load %[[PRV_X_DECL]]#0 : !fir.ref<i32>
 !CHECK:          %[[RES_X:.+]] = arith.addi %[[LPRV_X]], %{{.+}} : i32
 !CHECK:          hlfir.assign %[[RES_X]] to %[[PRV_X_DECL]]#0 : i32, !fir.ref<i32>
diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
index 9357e3fb8e5524..07c95497b7a419 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
@@ -178,6 +178,12 @@ template <typename T> using ListT = llvm::SmallVector<T, 0>;
 // provide their own specialization that conforms to the above requirements.
 template <typename IdType, typename ExprType> struct ObjectT;
 
+// By default, object equality is only determined by its identity.
+template <typename I, typename E>
+bool operator==(const ObjectT<I, E> &o1, const ObjectT<I, E> &o2) {
+  return o1.id() == o2.id();
+}
+
 template <typename I, typename E> using ObjectListT = ListT<ObjectT<I, E>>;
 
 using DirectiveName = llvm::omp::Directive;
@@ -264,6 +270,32 @@ struct ReductionIdentifierT {
 
 template <typename T, typename I, typename E> //
 using IteratorT = ListT<IteratorSpecifierT<T, I, E>>;
+
+template <typename T>
+std::enable_if_t<T::EmptyTrait::value, bool> operator==(const T &a,
+                                                        const T &b) {
+  return true;
+}
+template <typename T>
+std::enable_if_t<T::IncompleteTrait::value, bool> operator==(const T &a,
+                                                             const T &b) {
+  return true;
+}
+template <typename T>
+std::enable_if_t<T::WrapperTrait::value, bool> operator==(const T &a,
+                                                          const T &b) {
+  return a.v == b.v;
+}
+template <typename T>
+std::enable_if_t<T::TupleTrait::value, bool> operator==(const T &a,
+                                                        const T &b) {
+  return a.t == b.t;
+}
+template <typename T>
+std::enable_if_t<T::UnionTrait::value, bool> operator==(const T &a,
+                                                        const T &b) {
+  return a.u == b.u;
+}
 } // namespace type
 
 template <typename T> using ListT = type::ListT<T>;
@@ -285,6 +317,8 @@ ListT<ResultTy> makeList(ContainerTy &&container, FunctionTy &&func) {
 }
 
 namespace clause {
+using type::operator==;
+
 // V5.2: [8.3.1] `assumption` clauses
 template <typename T, typename I, typename E> //
 struct AbsentT {
@@ -726,7 +760,7 @@ struct LinearT {
   ENUM(LinearModifier, Ref, Val, Uval);
 
   using TupleTrait = std::true_type;
-  // Step == nullptr means 1.
+  // Step == nullopt means 1.
   std::tuple<OPT(StepSimpleModifier), OPT(StepComplexModifier),
              OPT(LinearModifier), List>
       t;
@@ -1234,9 +1268,10 @@ using UnionOfAllClausesT = typename type::Union< //
     UnionClausesT<T, I, E>,                      //
     WrapperClausesT<T, I, E>                     //
     >::type;
-
 } // namespace clause
 
+using type::operator==;
+
 // The variant wrapper that encapsulates all possible specific clauses.
 // The `Extras` arguments are additional types representing local extensions
 // to the clause set, e.g.
@@ -1262,6 +1297,11 @@ struct ClauseT {
   VariantTy u;
 };
 
+template <typename ClauseType> struct DirectiveWithClauses {
+  llvm::omp::Directive id = llvm::omp::Directive::OMPD_unknown;
+  tomp::type::ListT<ClauseType> clauses;
+};
+
 } // namespace tomp
 
 #undef OPT
diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructCompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructCompositionT.h
new file mode 100644
index 00000000000000..f7123a8ec43017
--- /dev/null
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructCompositionT.h
@@ -0,0 +1,404 @@
+//===- ConstructCompositionT.h -- Composing compound constructs -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Given a list of leaf construct, each with a set of clauses, generate the
+// compound construct whose leaf constructs are the given list, and whose clause
+// list is the merged lists of individual leaf clauses.
+//
+// *** At the moment it assumes that the individual constructs and their clauses
+// *** are a subset of those created by splitting a valid compound construct.
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_FRONTEND_OPENMP_CONSTRUCTCOMPOSITIONT_H
+#define LLVM_FRONTEND_OPENMP_CONSTRUCTCOMPOSITIONT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Frontend/OpenMP/ClauseT.h"
+#include "llvm/Frontend/OpenMP/OMP.h"
+
+#include <iterator>
+#include <optional>
+#include <tuple>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+
+namespace detail {
+template <typename T> struct trivial_hash {
+  size_t operator()(const T &) const { return 1u; }
+};
+} // namespace detail
+
+namespace tomp {
+template <typename ClauseType> struct ConstructCompositionT {
+  using ClauseTy = ClauseType;
+
+  using TypeTy = typename ClauseTy::TypeTy;
+  using IdTy = typename ClauseTy::IdTy;
+  using ExprTy = typename ClauseTy::ExprTy;
+
+  ConstructCompositionT(uint32_t version,
+                        llvm::ArrayRef<DirectiveWithClauses<ClauseTy>> leafs);
+
+  DirectiveWithClauses<ClauseTy> merged;
+
+private:
+  using ClauseSet =
+      std::unordered_set<ClauseTy, detail::trivial_hash<ClauseTy>>;
+
+  enum class Presence {
+    All,  // Clause is preesnt on all leaf constructs that allow it.
+    Some, // Clause is present on some, but not on all constructs.
+    None, // Clause is absent on all constructs.
+  };
+
+  template <typename S>
+  ClauseTy makeClause(llvm::omp::Clause clauseId, S &&specific) {
+    return ClauseTy{clauseId, std::move(specific)};
+  }
+
+  llvm::omp::Directive
+  makeCompound(llvm::ArrayRef<DirectiveWithClauses<ClauseTy>> parts);
+
+  Presence checkPresence(llvm::omp::Clause clauseId);
+
+  // There are clauses that need special handling:
+  // 1. "if": the "directive-name-modifier" on the merged clause may need
+  // to be set appropriately.
+  // 2. "reduction": implies "privateness" of all objects (incompatible
+  // with "shared"); there are rules for merging modifiers
+  void mergeIf();
+  void mergeReduction();
+  void mergeDSA();
+
+  uint32_t version;
+  llvm::ArrayRef<DirectiveWithClauses<ClauseTy>> leafs;
+
+  // clause id -> set of leaf constructs that contain it
+  std::unordered_map<llvm::omp::Clause, llvm::BitVector> clausePresence;
+  // clause id -> set of instances of that clause
+  std::unordered_map<llvm::omp::Clause, ClauseSet> clauseSets;
+};
+
+template <typename C>
+ConstructCompositionT<C>::ConstructCompositionT(
+    uint32_t version, llvm::ArrayRef<DirectiveWithClauses<C>> leafs)
+    : version(version), leafs(leafs) {
+  // Merge the list of constructs with clauses into a compound construct
+  // with a single list of clauses.
+  // The intended use of this function is in splitting compound constructs,
+  // while preserving composite constituent constructs:
+  // Step 1: split compound construct into leaf constructs.
+  // Step 2: identify composite sub-construct, and merge the constituent leafs.
+  //
+  // *** At the moment it assumes that the individual constructs and their
+  // *** clauses are a subset of those created by splitting a valid compound
+  // *** construct.
+  //
+  // 1. Deduplicate clauses
+  //    - exact duplicates: e.g. shared(x) shared(x) -> shared(x)
+  //    - special cases of clauses differing in modifier:
+  //      (a) reduction: inscan + (none|default) = inscan
+  //      (b) reduction: task + (none|default) = task
+  //      (c) combine repeated "if" clauses if possible
+  // 2. Merge DSA clauses: e.g. private(x) private(y) -> private(x, y).
+  // 3. Resolve potential DSA conflicts (typically due to implied clauses).
+
+  if (leafs.empty())
+    return;
+
+  merged.id = makeCompound(leafs);
+
+  // Populate the two maps:
+  for (const auto &[index, leaf] : llvm::enumerate(leafs)) {
+    for (const auto &clause : leaf.clauses) {
+      // Update clausePresence.
+      auto &set = clausePresence[clause.id];
+      if (set.size() < leafs.size())
+        set.resize(leafs.size());
+      set.set(index);
+      // Update clauseSets.
+      clauseSets[clause.id].insert(clause);
+    }
+  }
+
+  mergeIf();
+  mergeReduction();
+  mergeDSA();
+
+  // Fir the rest of the clauses, just copy them.
+  for (auto &[id, clauses] : clauseSets) {
+    // Skip clauses we've already dealt with.
+    switch (id) {
+    case llvm::omp::Clause::OMPC_if:
+    case llvm::omp::Clause::OMPC_reduction:
+    case llvm::omp::Clause::OMPC_shared:
+    case llvm::omp::Clause::OMPC_private:
+    case llvm::omp::Clause::OMPC_firstprivate:
+    case llvm::omp::Clause::OMPC_lastprivate:
+      continue;
+    default:
+      break;
+    }
+    llvm::append_range(merged.clauses, clauses);
+  }
+}
+
+template <typename C>
+llvm::omp::Directive ConstructCompositionT<C>::makeCompound(
+    llvm::ArrayRef<DirectiveWithClauses<ClauseTy>> parts) {
+  llvm::SmallVector<llvm::omp::Directive> dirIds;
+  llvm::transform(parts, std::back_inserter(dirIds),
+                  [](auto &&dwc) { return dwc.id; });
+
+  return llvm::omp::getCompoundConstruct(dirIds);
+}
+
+template <typename C>
+auto ConstructCompositionT<C>::checkPresence(llvm::omp::Clause clauseId)
+    -> Presence {
+  auto found = clausePresence.find(clauseId);
+  if (found == clausePresence.end())
+    return Presence::None;
+
+  bool OnAll = true, OnNone = true;
+  for (const auto &[index, leaf] : llvm::enumerate(leafs)) {
+    if (!llvm::omp::isAllowedClauseForDirective(leaf.id, clauseId, version))
+      continue;
+
+    if (found->second.test(index))
+      OnNone = false;
+    else
+      OnAll = false;
+  }
+
+  if (OnNone)
+    return Presence::None;
+  if (OnAll)
+    return Presence::All;
+  return Presence::Some;
+}
+
+template <typename C> void ConstructCompositionT<C>::mergeIf() {
+  using IfTy = tomp::clause::IfT<TypeTy, IdTy, ExprTy>;
+  // Deal with the "if" clauses. If it's on all leafs that allow it, then it
+  // will apply to the compound construct. Otherwise it will apply to the
+  // single (assumed) leaf construct.
+  // This assumes that the "if" clauses have the same expression.
+  Presence presence = checkPresence(llvm::omp::Clause::OMPC_if);
+  if (presence == Presence::None)
+    return;
+
+  const ClauseTy &some = *clauseSets[llvm::omp::Clause::OMPC_if].begin();
+  const auto &someIf = std::get<IfTy>(some.u);
+
+  if (presence == Presence::All) {
+    // Create "if" without "directive-name-modifier".
+    merged.clauses.emplace_back(
+        makeClause(llvm::omp::Clause::OMPC_if,
+                   IfTy{{/*DirectiveNameModifier=*/std::nullopt,
+                         /*IfExpression=*/std::get<typename IfTy::IfExpression>(
+                             someIf.t)}}));
+  } else {
+    // Find out where it's present and create "if" with the corresponding
+    // "directive-name-modifier".
+    int Idx = clausePresence[llvm::omp::Clause::OMPC_if].find_first();
+    assert(Idx >= 0);
+    merged.clauses.emplace_back(
+        makeClause(llvm::omp::Clause::OMPC_if,
+                   IfTy{{/*DirectiveNameModifier=*/leafs[Idx].id,
+                         /*IfExpression=*/std::get<typename IfTy::IfExpression>(
+                             someIf.t)}}));
+  }
+}
+
+template <typename C> void ConstructCompositionT<C>::mergeReduction() {
+  Presence presence = checkPresence(llvm::omp::Clause::OMPC_reduction);
+  if (presence == Presence::None)
+    return;
+
+  using ReductionTy = tomp::clause::ReductionT<TypeTy, IdTy, ExprTy>;
+  using ModifierTy = typename ReductionTy::ReductionModifier;
+  using IdentifiersTy = typename ReductionTy::ReductionIdentifiers;
+  using ListTy = typename ReductionTy::List;
+  // There are exceptions on which constructs "reduction" may appear
+  // (specifically "parallel", and "teams"). Assume that if "reduction"
+  // is present, it can be applied to the compound construct.
+
+  // What's left is to see if there are any modifiers present. Again,
+  // assume that there are no conflicting modifiers.
+  // There can be, however, multiple reductions on different objects.
+  auto equal = [](const ClauseTy &red1, const ClauseTy &red2) {
+    // Extract actual reductions.
+    const auto r1 = std::get<ReductionTy>(red1.u);
+    const auto r2 = std::get<ReductionTy>(red2.u);
+    // Compare everything except modifiers.
+    if (std::get<IdentifiersTy>(r1.t) != std::get<IdentifiersTy>(r2.t))
+      return false;
+    if (std::get<ListTy>(r1.t) != std::get<ListTy>(r2.t))
+      return false;
+    return true;
+  };
+
+  auto getModifier = [](const ClauseTy &clause) {
+    const ReductionTy &red = std::get<ReductionTy>(clause.u);
+    return std::get<std::optional<ModifierTy>>(red.t);
+  };
+
+  const ClauseSet &reductions = clauseSets[llvm::omp::Clause::OMPC_reduction];
+  std::unordered_set<const ClauseTy *> visited;
+  while (reductions.size() != visited.size()) {
+    typename ClauseSet::const_iterator first;
+
+    // Find first non-visited reduction.
+    for (first = reductions.begin(); first != reductions.end(); ++first) {
+      if (visited.count(&*first))
+        continue;
+      visited.insert(&*first);
+      break;
+    }
+
+    std::optional<ModifierTy> modifier = getModifier(*first);
+
+    // Visit all other reductions that are "equal" (with respect to the
+    // definition above) to "first". Collect modifiers.
+    for (auto iter = std::next(first); iter != reductions.end(); ++iter) {
+      if (!equal(*first, *iter))
+        continue;
+      visited.insert(&*iter);
+      if (!modifier || *modifier == ModifierTy::Default)
+        modifier = getModifier(*iter);
+    }
+
+    const auto &firstRed = std::get<ReductionTy>(first->u);
+    merged.clauses.emplace_back(makeClause(
+        llvm::omp::Clause::OMPC_reduction,
+        ReductionTy{
+            {/*ReductionModifier=*/modifier,
+             /*ReductionIdentifiers=*/std::get<IdentifiersTy>(firstRed.t),
+             /*List=*/std::get<ListTy>(firstRed.t)}}));
+  }
+}
+
+template <typename C> void ConstructCompositionT<C>::mergeDSA() {
+  using ObjectTy = tomp::type::ObjectT<IdTy, ExprTy>;
+
+  struct Hash {
+    bool operator()(const ObjectTy &object) const {
+      return std::hash<llvm::remove_cvref_t<IdTy>>()(object.id());
+    }
+  };
+
+  // Resolve data-sharing attributes.
+  enum DSA : int {
+    None = 0,
+    Shared = 1 << 0,
+    Private = 1 << 1,
+    FirstPrivate = 1 << 2,
+    LastPrivate = 1 << 3,
+    LastPrivateConditional = 1 << 4,
+  };
+
+  // The operator[] will value-initialize (i.e. zero-initialize) the "int"
+  // mapped-value if the given key is not present.
+  std::unordered_map<ObjectTy, int, Hash> objectDsa;
+
+  using SharedTy = tomp::clause::SharedT<TypeTy, IdTy, ExprTy>;
+  using PrivateTy = tomp::clause::PrivateT<TypeTy, IdTy, ExprTy>;
+  using FirstprivateTy = tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy>;
+  using LastprivateTy = tomp::clause::LastprivateT<TypeTy, IdTy, ExprTy>;
+
+  // Visit clauses that affect DSA.
+  for (auto &clause : clauseSets[llvm::omp::Clause::OMPC_shared]) {
+    for (auto &object : std::get<SharedTy>(clause.u).v)
+      objectDsa[object] |= DSA::Shared;
+  }
+
+  for (auto &clause : clauseSets[llvm::omp::Clause::OMPC_private]) {
+    for (auto &object : std::get<PrivateTy>(clause.u).v)
+      objectDsa[object] |= DSA::Private;
+  }
+
+  for (auto &clause : clauseSets[llvm::omp::Clause::OMPC_firstprivate]) {
+    for (auto &object : std::get<FirstprivateTy>(clause.u).v)
+      objectDsa[object] |= DSA::FirstPrivate;
+  }
+
+  for (auto &clause : clauseSets[llvm::omp::Clause::OMPC_lastprivate]) {
+    using ModifierTy = typename LastprivateTy::LastprivateModifier;
+    using ListTy = typename LastprivateTy::List;
+    const auto &lastp = std::get<LastprivateTy>(clause.u);
+    for (auto &object : std::get<ListTy>(lastp.t)) {
+      auto &mod = std::get<std::optional<ModifierTy>>(lastp.t);
+      if (mod && *mod == ModifierTy::Conditional) {
+        objectDsa[object] |= DSA::LastPrivateConditional;
+      } else {
+        objectDsa[object] |= DSA::LastPrivate;
+      }
+    }
+  }
+
+  // Check reductions as well, clear "shared" if set.
+  for (auto &clause : clauseSets[llvm::omp::Clause::OMPC_reduction]) {
+    using ReductionTy = tomp::clause::ReductionT<TypeTy, IdTy, ExprTy>;
+    using ListTy = typename ReductionTy::List;
+    for (auto &object : std::get<ListTy>(std::get<ReductionTy>(clause.u).t))
+      objectDsa[object] &= ~DSA::Shared;
+  }
+
+  tomp::ListT<ObjectTy> privateObj, sharedObj, firstpObj, lastpObj, lastpcObj;
+  for (auto &[object, dsa] : objectDsa) {
+    if (dsa &
+        (DSA::FirstPrivate | DSA::LastPrivate | DSA::LastPrivateConditional)) {
+      if (dsa & DSA::FirstPrivate)
+        firstpObj.push_back(object); // no else
+      if (dsa & DSA::LastPrivateConditional)
+        lastpcObj.push_back(object);
+      else if (dsa & DSA::LastPrivate)
+        lastpObj.push_back(object);
+    } else if (dsa & DSA::Private) {
+      privateObj.push_back(object);
+    } else if (dsa & DSA::Shared) {
+      sharedObj.push_back(object);
+    }
+  }
+
+  // Materialize each clause.
+  if (!privateObj.empty()) {
+    merged.clauses.emplace_back(
+        makeClause(llvm::omp::Clause::OMPC_private,
+                   PrivateTy{/*List=*/std::move(privateObj)}));
+  }
+  if (!sharedObj.empty()) {
+    merged.clauses.emplace_back(
+        makeClause(llvm::omp::Clause::OMPC_shared,
+                   SharedTy{/*List=*/std::move(sharedObj)}));
+  }
+  if (!firstpObj.empty()) {
+    merged.clauses.emplace_back(
+        makeClause(llvm::omp::Clause::OMPC_firstprivate,
+                   FirstprivateTy{/*List=*/std::move(firstpObj)}));
+  }
+  if (!lastpObj.empty()) {
+    merged.clauses.emplace_back(
+        makeClause(llvm::omp::Clause::OMPC_lastprivate,
+                   LastprivateTy{{/*LastprivateModifier=*/std::nullopt,
+                                  /*List=*/std::move(lastpObj)}}));
+  }
+  if (!lastpcObj.empty()) {
+    auto conditional = LastprivateTy::LastprivateModifier::Conditional;
+    merged.clauses.emplace_back(
+        makeClause(llvm::omp::Clause::OMPC_lastprivate,
+                   LastprivateTy{{/*LastprivateModifier=*/conditional,
+                                  /*List=*/std::move(lastpcObj)}}));
+  }
+}
+} // namespace tomp
+
+#endif // LLVM_FRONTEND_OPENMP_CONSTRUCTCOMPOSITIONT_H
diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
index f2a3c0d327fd46..c8e7eb4b18b57e 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
@@ -13,14 +13,12 @@
 //
 // Note: Composite constructs will also be broken up into leaf constructs.
 // If composite constructs require processing as a whole, the lists of clauses
-// for each leaf constituent should be concatenated.
+// for each leaf constituent should be merged.
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_FRONTEND_OPENMP_CONSTRUCTDECOMPOSITIONT_H
 #define LLVM_FRONTEND_OPENMP_CONSTRUCTDECOMPOSITIONT_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator_range.h"
@@ -32,6 +30,8 @@
 #include <optional>
 #include <tuple>
 #include <type_traits>
+#include <unordered_map>
+#include <unordered_set>
 #include <utility>
 #include <variant>
 
@@ -64,15 +64,22 @@ find_unique(Container &&container, Predicate &&pred) {
     return first;
   return container.end();
 }
+
 } // namespace detail
 
 namespace tomp {
 
-template <typename ClauseType> struct DirectiveWithClauses {
-  llvm::omp::Directive id = llvm::omp::Directive::OMPD_unknown;
-  tomp::type::ListT<ClauseType> clauses;
-};
-
+// ClauseType - Either instance of ClauseT, or a type derived from ClauseT.
+//
+// This is the clause representation in the code using this infrastructure.
+//
+// HelperType - A class that implements two member functions:
+//
+//   // Return the base object of the given object, if any.
+//   std::optional<Object> getBaseObject(const Object &object) const
+//   // Return the iteration variable of the outermost loop associated
+//   // with the construct being worked on, if any.
+//   std::optional<Object> getLoopIterVar() const
 template <typename ClauseType, typename HelperType>
 struct ConstructDecompositionT {
   using ClauseTy = ClauseType;
@@ -83,7 +90,7 @@ struct ConstructDecompositionT {
   using HelperTy = HelperType;
   using ObjectTy = tomp::ObjectT<IdTy, ExprTy>;
 
-  using ClauseSet = llvm::DenseSet<const ClauseTy *>;
+  using ClauseSet = std::unordered_set<const ClauseTy *>;
 
   ConstructDecompositionT(uint32_t ver, HelperType &hlp,
                           llvm::omp::Directive dir,
@@ -93,17 +100,18 @@ struct ConstructDecompositionT {
       nodes.push_back(&clause);
 
     bool success = split();
-    if (success) {
-      // Copy the broken down directives with their clauses to the
-      // output list. Copy by value, since we don't own the storage
-      // with the input clauses, and the internal representation uses
-      // clause addresses.
-      for (auto &leaf : leafs) {
-        output.push_back({leaf.id});
-        auto &dwc = output.back();
-        for (const ClauseTy *c : leaf.clauses)
-          dwc.clauses.push_back(*c);
-      }
+    if (!success)
+      return;
+
+    // Copy the individual leaf directives with their clauses to the
+    // output list. Copy by value, since we don't own the storage
+    // with the input clauses, and the internal representation uses
+    // clause addresses.
+    for (auto &leaf : leafs) {
+      output.push_back({leaf.id});
+      auto &out = output.back();
+      for (const ClauseTy *c : leaf.clauses)
+        out.clauses.push_back(*c);
     }
   }
 
@@ -237,8 +245,8 @@ struct ConstructDecompositionT {
   tomp::ListT<const ClauseTy *> nodes;
   std::list<ClauseTy> implicit; // Container for materialized implicit clauses.
                                 // Inserting must preserve element addresses.
-  llvm::DenseMap<IdTy, ClauseSet> syms;
-  llvm::DenseSet<IdTy> mapBases;
+  std::unordered_map<IdTy, ClauseSet> syms;
+  std::unordered_set<IdTy> mapBases;
 };
 
 // Deduction guide
@@ -385,10 +393,10 @@ bool ConstructDecompositionT<C, H>::applyToFirst(
   if (range.empty())
     return false;
 
-  for (auto &dwc : range) {
-    if (!llvm::omp::isAllowedClauseForDirective(dwc.id, node->id, version))
+  for (auto &leaf : range) {
+    if (!llvm::omp::isAllowedClauseForDirective(leaf.id, node->id, version))
       continue;
-    dwc.clauses.push_back(node);
+    leaf.clauses.push_back(node);
     return true;
   }
   return false;
@@ -413,12 +421,12 @@ template <typename Predicate>
 bool ConstructDecompositionT<C, H>::applyIf(const ClauseTy *node,
                                             Predicate shouldApply) {
   bool applied = false;
-  for (auto &dwc : leafs) {
-    if (!llvm::omp::isAllowedClauseForDirective(dwc.id, node->id, version))
+  for (auto &leaf : leafs) {
+    if (!llvm::omp::isAllowedClauseForDirective(leaf.id, node->id, version))
       continue;
-    if (!shouldApply(dwc))
+    if (!shouldApply(leaf))
       continue;
-    dwc.clauses.push_back(node);
+    leaf.clauses.push_back(node);
     applied = true;
   }
 
@@ -448,11 +456,17 @@ bool ConstructDecompositionT<C, H>::applyClause(Clause &&clause,
 }
 
 // COLLAPSE
+// [5.2:93:20-21]
+// Directives: distribute, do, for, loop, simd, taskloop
+//
+// [5.2:339:35]
+// (35) The collapse clause is applied once to the combined or composite
+// construct.
 template <typename C, typename H>
 bool ConstructDecompositionT<C, H>::applyClause(
     const tomp::clause::CollapseT<TypeTy, IdTy, ExprTy> &clause,
     const ClauseTy *node) {
-  // Apply COLLAPSE to the innermost directive. If it's not one that
+  // Apply "collapse" to the innermost directive. If it's not one that
   // allows it flag an error.
   if (!leafs.empty()) {
     auto &last = leafs.back();
@@ -467,6 +481,13 @@ bool ConstructDecompositionT<C, H>::applyClause(
 }
 
 // PRIVATE
+// [5.2:111:5-7]
+// Directives: distribute, do, for, loop, parallel, scope, sections, simd,
+// single, target, task, taskloop, teams
+//
+// [5.2:340:1-2]
+// (1) The effect of the 1 private clause is as if it is applied only to the
+// innermost leaf construct that permits it.
 template <typename C, typename H>
 bool ConstructDecompositionT<C, H>::applyClause(
     const tomp::clause::PrivateT<TypeTy, IdTy, ExprTy> &clause,
@@ -475,28 +496,48 @@ bool ConstructDecompositionT<C, H>::applyClause(
 }
 
 // FIRSTPRIVATE
+// [5.2:112:5-7]
+// Directives: distribute, do, for, parallel, scope, sections, single, target,
+// task, taskloop, teams
+//
+// [5.2:340:3-20]
+// (3) The effect of the firstprivate clause is as if it is applied to one or
+// more leaf constructs as follows:
+//  (5) To the distribute construct if it is among the constituent constructs;
+//  (6) To the teams construct if it is among the constituent constructs and the
+//      distribute construct is not;
+//  (8) To a worksharing construct that accepts the clause if one is among the
+//      constituent constructs;
+//  (9) To the taskloop construct if it is among the constituent constructs;
+// (10) To the parallel construct if it is among the constituent constructs and
+//      neither a taskloop construct nor a worksharing construct that accepts
+//      the clause is among them;
+// (12) To the target construct if it is among the constituent constructs and
+//      the same list item neither appears in a lastprivate clause nor is the
+//      base variable or base pointer of a list item that appears in a map
+//      clause.
+//
+// (15) If the parallel construct is among the constituent constructs and the
+// effect is not as if the firstprivate clause is applied to it by the above
+// rules, then the effect is as if the shared clause with the same list item is
+// applied to the parallel construct.
+// (17) If the teams construct is among the constituent constructs and the
+// effect is not as if the firstprivate clause is applied to it by the above
+// rules, then the effect is as if the shared clause with the same list item is
+// applied to the teams construct.
 template <typename C, typename H>
 bool ConstructDecompositionT<C, H>::applyClause(
     const tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy> &clause,
     const ClauseTy *node) {
   bool applied = false;
 
-  // S Section 17.2
-  // S The effect of the firstprivate clause is as if it is applied to one
-  // S or more leaf constructs as follows:
-
-  // S - To the distribute construct if it is among the constituent constructs;
-  // S - To the teams construct if it is among the constituent constructs and
-  // S   the distribute construct is not;
+  // [5.2:340:3-6]
   auto hasDistribute = findDirective(llvm::omp::OMPD_distribute);
   auto hasTeams = findDirective(llvm::omp::OMPD_teams);
   if (hasDistribute != nullptr) {
     hasDistribute->clauses.push_back(node);
     applied = true;
-    // S If the teams construct is among the constituent constructs and the
-    // S effect is not as if the firstprivate clause is applied to it by the
-    // S above rules, then the effect is as if the shared clause with the
-    // S same list item is applied to the teams construct.
+    // [5.2:340:17]
     if (hasTeams != nullptr) {
       auto *shared = makeClause(
           llvm::omp::Clause::OMPC_shared,
@@ -508,14 +549,13 @@ bool ConstructDecompositionT<C, H>::applyClause(
     applied = true;
   }
 
-  // S - To a worksharing construct that accepts the clause if one is among
-  // S   the constituent constructs;
+  // [5.2:340:8]
   auto findWorksharing = [&]() {
     auto worksharing = getWorksharing();
-    for (auto &dwc : leafs) {
-      auto found = llvm::find(worksharing, dwc.id);
+    for (auto &leaf : leafs) {
+      auto found = llvm::find(worksharing, leaf.id);
       if (found != std::end(worksharing))
-        return &dwc;
+        return &leaf;
     }
     return static_cast<typename decltype(leafs)::value_type *>(nullptr);
   };
@@ -526,26 +566,21 @@ bool ConstructDecompositionT<C, H>::applyClause(
     applied = true;
   }
 
-  // S - To the taskloop construct if it is among the constituent constructs;
+  // [5.2:340:9]
   auto hasTaskloop = findDirective(llvm::omp::OMPD_taskloop);
   if (hasTaskloop != nullptr) {
     hasTaskloop->clauses.push_back(node);
     applied = true;
   }
 
-  // S - To the parallel construct if it is among the constituent constructs
-  // S   and neither a taskloop construct nor a worksharing construct that
-  // S   accepts the clause is among them;
+  // [5.2:340:10]
   auto hasParallel = findDirective(llvm::omp::OMPD_parallel);
   if (hasParallel != nullptr) {
     if (hasTaskloop == nullptr && hasWorksharing == nullptr) {
       hasParallel->clauses.push_back(node);
       applied = true;
     } else {
-      // S If the parallel construct is among the constituent constructs and
-      // S the effect is not as if the firstprivate clause is applied to it by
-      // S the above rules, then the effect is as if the shared clause with
-      // S the same list item is applied to the parallel construct.
+      // [5.2:340:15]
       auto *shared = makeClause(
           llvm::omp::Clause::OMPC_shared,
           tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/clause.v});
@@ -553,10 +588,7 @@ bool ConstructDecompositionT<C, H>::applyClause(
     }
   }
 
-  // S - To the target construct if it is among the constituent constructs
-  // S   and the same list item neither appears in a lastprivate clause nor
-  // S   is the base variable or base pointer of a list item that appears in
-  // S   a map clause.
+  // [5.2:340:12]
   auto inLastprivate = [&](const ObjectTy &object) {
     if (ClauseSet *set = findClausesWith(object)) {
       return llvm::find_if(*set, [](const ClauseTy *c) {
@@ -571,7 +603,7 @@ bool ConstructDecompositionT<C, H>::applyClause(
     tomp::ObjectListT<IdTy, ExprTy> objects;
     llvm::copy_if(
         clause.v, std::back_inserter(objects), [&](const ObjectTy &object) {
-          return !inLastprivate(object) && !mapBases.contains(object.id());
+          return !inLastprivate(object) && !mapBases.count(object.id());
         });
     if (!objects.empty()) {
       auto *firstp = makeClause(
@@ -582,18 +614,41 @@ bool ConstructDecompositionT<C, H>::applyClause(
     }
   }
 
+  // "task" is not handled by any of the cases above.
+  if (auto hasTask = findDirective(llvm::omp::OMPD_task)) {
+    hasTask->clauses.push_back(node);
+    applied = true;
+  }
+
   return applied;
 }
 
 // LASTPRIVATE
+// [5.2:115:7-8]
+// Directives: distribute, do, for, loop, sections, simd, taskloop
+//
+// [5.2:340:21-30]
+// (21) The effect of the lastprivate clause is as if it is applied to all leaf
+// constructs that permit the clause.
+// (22) If the parallel construct is among the constituent constructs and the
+// list item is not also specified in the firstprivate clause, then the effect
+// of the lastprivate clause is as if the shared clause with the same list item
+// is applied to the parallel construct.
+// (24) If the teams construct is among the constituent constructs and the list
+// item is not also specified in the firstprivate clause, then the effect of the
+// lastprivate clause is as if the shared clause with the same list item is
+// applied to the teams construct.
+// (27) If the target construct is among the constituent constructs and the list
+// item is not the base variable or base pointer of a list item that appears in
+// a map clause, the effect of the lastprivate clause is as if the same list
+// item appears in a map clause with a map-type of tofrom.
 template <typename C, typename H>
 bool ConstructDecompositionT<C, H>::applyClause(
     const tomp::clause::LastprivateT<TypeTy, IdTy, ExprTy> &clause,
     const ClauseTy *node) {
   bool applied = false;
 
-  // S The effect of the lastprivate clause is as if it is applied to all leaf
-  // S constructs that permit the clause.
+  // [5.2:340:21]
   applied = applyToAll(node);
   if (!applied)
     return false;
@@ -609,17 +664,14 @@ bool ConstructDecompositionT<C, H>::applyClause(
 
   auto &objects = std::get<tomp::ObjectListT<IdTy, ExprTy>>(clause.t);
 
-  // Prepare list of objects that could end up in a SHARED clause.
+  // Prepare list of objects that could end up in a "shared" clause.
   tomp::ObjectListT<IdTy, ExprTy> sharedObjects;
   llvm::copy_if(
       objects, std::back_inserter(sharedObjects),
       [&](const ObjectTy &object) { return !inFirstprivate(object); });
 
   if (!sharedObjects.empty()) {
-    // S If the parallel construct is among the constituent constructs and the
-    // S list item is not also specified in the firstprivate clause, then the
-    // S effect of the lastprivate clause is as if the shared clause with the
-    // S same list item is applied to the parallel construct.
+    // [5.2:340:22]
     if (auto hasParallel = findDirective(llvm::omp::OMPD_parallel)) {
       auto *shared = makeClause(
           llvm::omp::Clause::OMPC_shared,
@@ -628,10 +680,7 @@ bool ConstructDecompositionT<C, H>::applyClause(
       applied = true;
     }
 
-    // S If the teams construct is among the constituent constructs and the
-    // S list item is not also specified in the firstprivate clause, then the
-    // S effect of the lastprivate clause is as if the shared clause with the
-    // S same list item is applied to the teams construct.
+    // [5.2:340:24]
     if (auto hasTeams = findDirective(llvm::omp::OMPD_teams)) {
       auto *shared = makeClause(
           llvm::omp::Clause::OMPC_shared,
@@ -641,16 +690,12 @@ bool ConstructDecompositionT<C, H>::applyClause(
     }
   }
 
-  // S If the target construct is among the constituent constructs and the
-  // S list item is not the base variable or base pointer of a list item that
-  // S appears in a map clause, the effect of the lastprivate clause is as if
-  // S the same list item appears in a map clause with a map-type of tofrom.
+  // [5.2:340:27]
   if (auto hasTarget = findDirective(llvm::omp::OMPD_target)) {
     tomp::ObjectListT<IdTy, ExprTy> tofrom;
-    llvm::copy_if(objects, std::back_inserter(tofrom),
-                  [&](const ObjectTy &object) {
-                    return !mapBases.contains(object.id());
-                  });
+    llvm::copy_if(
+        objects, std::back_inserter(tofrom),
+        [&](const ObjectTy &object) { return !mapBases.count(object.id()); });
 
     if (!tofrom.empty()) {
       using MapType =
@@ -671,42 +716,74 @@ bool ConstructDecompositionT<C, H>::applyClause(
 }
 
 // SHARED
+// [5.2:110:5-6]
+// Directives: parallel, task, taskloop, teams
+//
+// [5.2:340:31-32]
+// (31) The effect of the shared, default, thread_limit, or order clause is as
+// if it is applied to all leaf constructs that permit the clause.
 template <typename C, typename H>
 bool ConstructDecompositionT<C, H>::applyClause(
     const tomp::clause::SharedT<TypeTy, IdTy, ExprTy> &clause,
     const ClauseTy *node) {
-  // Apply SHARED to the all leafs that allow it.
+  // [5.2:340:31]
   return applyToAll(node);
 }
 
 // DEFAULT
+// [5.2:109:5-6]
+// Directives: parallel, task, taskloop, teams
+//
+// [5.2:340:31-32]
+// (31) The effect of the shared, default, thread_limit, or order clause is as
+// if it is applied to all leaf constructs that permit the clause.
 template <typename C, typename H>
 bool ConstructDecompositionT<C, H>::applyClause(
     const tomp::clause::DefaultT<TypeTy, IdTy, ExprTy> &clause,
     const ClauseTy *node) {
-  // Apply DEFAULT to the all leafs that allow it.
+  // [5.2:340:31]
   return applyToAll(node);
 }
 
 // THREAD_LIMIT
+// [5.2:277:14-15]
+// Directives: target, teams
+//
+// [5.2:340:31-32]
+// (31) The effect of the shared, default, thread_limit, or order clause is as
+// if it is applied to all leaf constructs that permit the clause.
 template <typename C, typename H>
 bool ConstructDecompositionT<C, H>::applyClause(
     const tomp::clause::ThreadLimitT<TypeTy, IdTy, ExprTy> &clause,
     const ClauseTy *node) {
-  // Apply THREAD_LIMIT to the all leafs that allow it.
+  // [5.2:340:31]
   return applyToAll(node);
 }
 
 // ORDER
+// [5.2:234:3-4]
+// Directives: distribute, do, for, loop, simd
+//
+// [5.2:340:31-32]
+// (31) The effect of the shared, default, thread_limit, or order clause is as
+// if it is applied to all leaf constructs that permit the clause.
 template <typename C, typename H>
 bool ConstructDecompositionT<C, H>::applyClause(
     const tomp::clause::OrderT<TypeTy, IdTy, ExprTy> &clause,
     const ClauseTy *node) {
-  // Apply ORDER to the all leafs that allow it.
+  // [5.2:340:31]
   return applyToAll(node);
 }
 
 // ALLOCATE
+// [5.2:178:7-9]
+// Directives: allocators, distribute, do, for, parallel, scope, sections,
+// single, target, task, taskgroup, taskloop, teams
+//
+// [5.2:340:33-35]
+// (33) The effect of the allocate clause is as if it is applied to all leaf
+// constructs that permit the clause and to which a data-sharing attribute
+// clause that may create a private copy of the same list item is applied.
 template <typename C, typename H>
 bool ConstructDecompositionT<C, H>::applyClause(
     const tomp::clause::AllocateT<TypeTy, IdTy, ExprTy> &clause,
@@ -714,10 +791,7 @@ bool ConstructDecompositionT<C, H>::applyClause(
   // This one needs to be applied at the end, once we know which clauses are
   // assigned to which leaf constructs.
 
-  // S The effect of the allocate clause is as if it is applied to all leaf
-  // S constructs that permit the clause and to which a data-sharing attribute
-  // S clause that may create a private copy of the same list item is applied.
-
+  // [5.2:340:33]
   auto canMakePrivateCopy = [](llvm::omp::Clause id) {
     switch (id) {
     case llvm::omp::Clause::OMPC_firstprivate:
@@ -729,8 +803,8 @@ bool ConstructDecompositionT<C, H>::applyClause(
     }
   };
 
-  bool applied = applyIf(node, [&](const auto &dwc) {
-    return llvm::any_of(dwc.clauses, [&](const ClauseTy *n) {
+  bool applied = applyIf(node, [&](const auto &leaf) {
+    return llvm::any_of(leaf.clauses, [&](const ClauseTy *n) {
       return canMakePrivateCopy(n->id);
     });
   });
@@ -739,21 +813,44 @@ bool ConstructDecompositionT<C, H>::applyClause(
 }
 
 // REDUCTION
+// [5.2:134:17-18]
+// Directives: do, for, loop, parallel, scope, sections, simd, taskloop, teams
+//
+// [5.2:340:36-37], [5.2:341:1-13]
+// (36) The effect of the reduction clause is as if it is applied to all leaf
+// constructs that permit the clause, except for the following constructs:
+//  (1) The parallel construct, when combined with the sections,
+//      worksharing-loop, loop, or taskloop construct; and
+//  (3) The teams construct, when combined with the loop construct.
+// (4) For the parallel and teams constructs above, the effect of the reduction
+// clause instead is as if each list item or, for any list item that is an array
+// item, its corresponding base array or base pointer appears in a shared clause
+// for the construct.
+// (6) If the task reduction-modifier is specified, the effect is as if it only
+// modifies the behavior of the reduction clause on the innermost leaf construct
+// that accepts the modifier (see Section 5.5.8).
+// (8) If the inscan reduction-modifier is specified, the effect is as if it
+// modifies the behavior of the reduction clause on all constructs of the
+// combined construct to which the clause is applied and that accept the
+// modifier.
+// (10) If a list item in a reduction clause on a combined target construct does
+// not have the same base variable or base pointer as a list item in a map
+// clause on the construct, then the effect is as if the list item in the
+// reduction clause appears as a list item in a map clause with a map-type of
+// tofrom.
 template <typename C, typename H>
 bool ConstructDecompositionT<C, H>::applyClause(
     const tomp::clause::ReductionT<TypeTy, IdTy, ExprTy> &clause,
     const ClauseTy *node) {
-  // S The effect of the reduction clause is as if it is applied to all leaf
-  // S constructs that permit the clause, except for the following constructs:
-  // S - The parallel construct, when combined with the sections, worksharing-
-  // S   loop, loop, or taskloop construct; and
-  // S - The teams construct, when combined with the loop construct.
+  using ReductionTy = tomp::clause::ReductionT<TypeTy, IdTy, ExprTy>;
+
+  // [5.2:340:36], [5.2:341:1], [5.2:341:3]
   bool applyToParallel = true, applyToTeams = true;
 
   auto hasParallel = findDirective(llvm::omp::Directive::OMPD_parallel);
   if (hasParallel) {
     auto exclusions = llvm::concat<const llvm::omp::Directive>(
-        getWorksharingLoop(), llvm::ArrayRef{
+        getWorksharingLoop(), tomp::ListT<llvm::omp::Directive>{
                                   llvm::omp::Directive::OMPD_loop,
                                   llvm::omp::Directive::OMPD_sections,
                                   llvm::omp::Directive::OMPD_taskloop,
@@ -773,7 +870,72 @@ bool ConstructDecompositionT<C, H>::applyClause(
       applyToTeams = false;
   }
 
+  using ReductionModifier = typename ReductionTy::ReductionModifier;
+  using ReductionIdentifiers = typename ReductionTy::ReductionIdentifiers;
+
   auto &objects = std::get<tomp::ObjectListT<IdTy, ExprTy>>(clause.t);
+  auto &modifier = std::get<std::optional<ReductionModifier>>(clause.t);
+
+  // Apply the reduction clause first to all directives according to the spec.
+  // If the reduction was applied at least once, proceed with the data sharing
+  // side-effects.
+  bool applied = false;
+
+  // [5.2:341:6], [5.2:341:8]
+  auto isValidModifier = [](llvm::omp::Directive dir, ReductionModifier mod,
+                            bool alreadyApplied) {
+    switch (mod) {
+    case ReductionModifier::Inscan:
+      // According to [5.2:135:11-13], "inscan" only applies to
+      // worksharing-loop, worksharing-loop-simd, or "simd" constructs.
+      return dir == llvm::omp::Directive::OMPD_simd ||
+             llvm::is_contained(getWorksharingLoop(), dir);
+    case ReductionModifier::Task:
+      if (alreadyApplied)
+        return false;
+      // According to [5.2:135:16-18], "task" only applies to "parallel" and
+      // worksharing constructs.
+      return dir == llvm::omp::Directive::OMPD_parallel ||
+             llvm::is_contained(getWorksharing(), dir);
+    case ReductionModifier::Default:
+      return true;
+    }
+    llvm_unreachable("Unexpected modifier");
+  };
+
+  auto *unmodified = makeClause(
+      llvm::omp::Clause::OMPC_reduction,
+      ReductionTy{
+          {/*ReductionModifier=*/std::nullopt,
+           /*ReductionIdentifiers=*/std::get<ReductionIdentifiers>(clause.t),
+           /*List=*/objects}});
+
+  ReductionModifier effective =
+      modifier.has_value() ? *modifier : ReductionModifier::Default;
+  bool effectiveApplied = false;
+  // Walk over the leaf constructs starting from the innermost, and apply
+  // the clause as required by the spec.
+  for (auto &leaf : llvm::reverse(leafs)) {
+    if (!llvm::omp::isAllowedClauseForDirective(leaf.id, node->id, version))
+      continue;
+    if (!applyToParallel && &leaf == hasParallel)
+      continue;
+    if (!applyToTeams && &leaf == hasTeams)
+      continue;
+    // Some form of the clause will be applied past this point.
+    if (isValidModifier(leaf.id, effective, effectiveApplied)) {
+      // Apply clause with modifier.
+      leaf.clauses.push_back(node);
+      effectiveApplied = true;
+    } else {
+      // Apply clause without modifier.
+      leaf.clauses.push_back(unmodified);
+    }
+    applied = true;
+  }
+
+  if (!applied)
+    return false;
 
   tomp::ObjectListT<IdTy, ExprTy> sharedObjects;
   llvm::transform(objects, std::back_inserter(sharedObjects),
@@ -782,10 +944,7 @@ bool ConstructDecompositionT<C, H>::applyClause(
                     return maybeBase ? *maybeBase : object;
                   });
 
-  // S For the parallel and teams constructs above, the effect of the
-  // S reduction clause instead is as if each list item or, for any list
-  // S item that is an array item, its corresponding base array or base
-  // S pointer appears in a shared clause for the construct.
+  // [5.2:341:4]
   if (!sharedObjects.empty()) {
     if (hasParallel && !applyToParallel) {
       auto *shared = makeClause(
@@ -801,35 +960,15 @@ bool ConstructDecompositionT<C, H>::applyClause(
     }
   }
 
-  // TODO(not implemented in parser yet): Apply the following.
-  // S If the task reduction-modifier is specified, the effect is as if
-  // S it only modifies the behavior of the reduction clause on the innermost
-  // S leaf construct that accepts the modifier (see Section 5.5.8). If the
-  // S inscan reduction-modifier is specified, the effect is as if it modifies
-  // S the behavior of the reduction clause on all constructs of the combined
-  // S construct to which the clause is applied and that accept the modifier.
-
-  bool applied = applyIf(node, [&](auto &dwc) {
-    if (!applyToParallel && &dwc == hasParallel)
-      return false;
-    if (!applyToTeams && &dwc == hasTeams)
-      return false;
-    return true;
-  });
-
-  // S If a list item in a reduction clause on a combined target construct
-  // S does not have the same base variable or base pointer as a list item
-  // S in a map clause on the construct, then the effect is as if the list
-  // S item in the reduction clause appears as a list item in a map clause
-  // S with a map-type of tofrom.
+  // [5.2:341:10]
   auto hasTarget = findDirective(llvm::omp::Directive::OMPD_target);
   if (hasTarget && leafs.size() > 1) {
     tomp::ObjectListT<IdTy, ExprTy> tofrom;
     llvm::copy_if(objects, std::back_inserter(tofrom),
                   [&](const ObjectTy &object) {
                     if (auto maybeBase = helper.getBaseObject(object))
-                      return !mapBases.contains(maybeBase->id());
-                    return !mapBases.contains(object.id()); // XXX is this ok?
+                      return !mapBases.count(maybeBase->id());
+                    return !mapBases.count(object.id()); // XXX is this ok?
                   });
     if (!tofrom.empty()) {
       using MapType =
@@ -850,19 +989,35 @@ bool ConstructDecompositionT<C, H>::applyClause(
 }
 
 // IF
+// [5.2:72:7-9]
+// Directives: cancel, parallel, simd, target, target data, target enter data,
+// target exit data, target update, task, taskloop
+//
+// [5.2:72:15-18]
+// (15) For combined or composite constructs, the if clause only applies to the
+// semantics of the construct named in the directive-name-modifier.
+// (16) For a combined or composite construct, if no directive-name-modifier is
+// specified then the if clause applies to all constituent constructs to which
+// an if clause can apply.
 template <typename C, typename H>
 bool ConstructDecompositionT<C, H>::applyClause(
     const tomp::clause::IfT<TypeTy, IdTy, ExprTy> &clause,
     const ClauseTy *node) {
   using DirectiveNameModifier =
       typename clause::IfT<TypeTy, IdTy, ExprTy>::DirectiveNameModifier;
+  using IfExpression = typename clause::IfT<TypeTy, IdTy, ExprTy>::IfExpression;
   auto &modifier = std::get<std::optional<DirectiveNameModifier>>(clause.t);
 
   if (modifier) {
     llvm::omp::Directive dirId = *modifier;
+    auto *unmodified =
+        makeClause(llvm::omp::Clause::OMPC_if,
+                   tomp::clause::IfT<TypeTy, IdTy, ExprTy>{
+                       {/*DirectiveNameModifier=*/std::nullopt,
+                        /*IfExpression=*/std::get<IfExpression>(clause.t)}});
 
     if (auto *hasDir = findDirective(dirId)) {
-      hasDir->clauses.push_back(node);
+      hasDir->clauses.push_back(unmodified);
       return true;
     }
     return false;
@@ -872,45 +1027,42 @@ bool ConstructDecompositionT<C, H>::applyClause(
 }
 
 // LINEAR
+// [5.2:118:1-2]
+// Directives: declare simd, do, for, simd
+//
+// [5.2:341:15-22]
+// (15.1) The effect of the linear clause is as if it is applied to the
+// innermost leaf construct.
+// (15.2) Additionally, if the list item is not the iteration variable of a simd
+// or worksharing-loop SIMD construct, the effect on the outer leaf constructs
+// is as if the list item was specified in firstprivate and lastprivate clauses
+// on the combined or composite construct, with the rules specified above
+// applied.
+// (19) If a list item of the linear clause is the iteration variable of a simd
+// or worksharing-loop SIMD construct and it is not declared in the construct,
+// the effect on the outer leaf constructs is as if the list item was specified
+// in a lastprivate clause on the combined or composite construct with the rules
+// specified above applied.
 template <typename C, typename H>
 bool ConstructDecompositionT<C, H>::applyClause(
     const tomp::clause::LinearT<TypeTy, IdTy, ExprTy> &clause,
     const ClauseTy *node) {
-  // S The effect of the linear clause is as if it is applied to the innermost
-  // S leaf construct.
+  // [5.2:341:15.1]
   if (!applyToInnermost(node))
     return false;
 
-  // The rest is about SIMD.
-  if (!findDirective(llvm::omp::OMPD_simd))
-    return true;
-
-  // S Additionally, if the list item is not the iteration variable of a
-  // S simd or worksharing-loop SIMD construct, the effect on the outer leaf
-  // S constructs is as if the list item was specified in firstprivate and
-  // S lastprivate clauses on the combined or composite construct, [...]
-  //
-  // S If a list item of the linear clause is the iteration variable of a
-  // S simd or worksharing-loop SIMD construct and it is not declared in
-  // S the construct, the effect on the outer leaf constructs is as if the
-  // S list item was specified in a lastprivate clause on the combined or
-  // S composite construct [...]
-
-  // It's not clear how an object can be listed in a clause AND be the
-  // iteration variable of a construct in which is it declared. If an
-  // object is declared in the construct, then the declaration is located
-  // after the clause listing it.
-
+  // [5.2:341:15.2], [5.2:341:19]
+  auto hasSimd = findDirective(llvm::omp::Directive::OMPD_simd);
   std::optional<ObjectTy> iterVar = helper.getLoopIterVar();
   const auto &objects = std::get<tomp::ObjectListT<IdTy, ExprTy>>(clause.t);
 
-  // Lists of objects that will be used to construct FIRSTPRIVATE and
-  // LASTPRIVATE clauses.
+  // Lists of objects that will be used to construct "firstprivate" and
+  // "lastprivate" clauses.
   tomp::ObjectListT<IdTy, ExprTy> first, last;
 
   for (const ObjectTy &object : objects) {
     last.push_back(object);
-    if (iterVar && object.id() != iterVar->id())
+    if (!hasSimd || !iterVar || object.id() != iterVar->id())
       first.push_back(object);
   }
 
@@ -931,6 +1083,13 @@ bool ConstructDecompositionT<C, H>::applyClause(
 }
 
 // NOWAIT
+// [5.2:308:11-13]
+// Directives: dispatch, do, for, interop, scope, sections, single, target,
+// target enter data, target exit data, target update, taskwait, workshare
+//
+// [5.2:341:23]
+// (23) The effect of the nowait clause is as if it is applied to the outermost
+// leaf construct that permits it.
 template <typename C, typename H>
 bool ConstructDecompositionT<C, H>::applyClause(
     const tomp::clause::NowaitT<TypeTy, IdTy, ExprTy> &clause,
@@ -949,7 +1108,7 @@ template <typename C, typename H> bool ConstructDecompositionT<C, H>::split() {
     addClauseSymsToMap(*node, node);
 
   // First we need to apply LINEAR, because it can generate additional
-  // FIRSTPRIVATE and LASTPRIVATE clauses that apply to the combined/
+  // "firstprivate" and "lastprivate" clauses that apply to the combined/
   // composite construct.
   // Collect them separately, because they may modify the clause list.
   llvm::SmallVector<const ClauseTy *> linears;
@@ -964,7 +1123,7 @@ template <typename C, typename H> bool ConstructDecompositionT<C, H>::split() {
                           node);
   }
 
-  // ALLOCATE clauses need to be applied last since they need to see
+  // "allocate" clauses need to be applied last since they need to see
   // which directives have data-privatizing clauses.
   auto skip = [](const ClauseTy *node) {
     switch (node->id) {
@@ -985,7 +1144,7 @@ template <typename C, typename H> bool ConstructDecompositionT<C, H>::split() {
         std::visit([&](auto &&s) { return applyClause(s, node); }, node->u);
   }
 
-  // Apply ALLOCATE.
+  // Apply "allocate".
   for (const ClauseTy *node : nodes) {
     if (node->id != llvm::omp::Clause::OMPC_allocate)
       continue;
diff --git a/llvm/unittests/Frontend/CMakeLists.txt b/llvm/unittests/Frontend/CMakeLists.txt
index 3f290b63ba6479..85e113816e3bc6 100644
--- a/llvm/unittests/Frontend/CMakeLists.txt
+++ b/llvm/unittests/Frontend/CMakeLists.txt
@@ -15,6 +15,7 @@ add_llvm_unittest(LLVMFrontendTests
   OpenMPIRBuilderTest.cpp
   OpenMPParsingTest.cpp
   OpenMPCompositionTest.cpp
+  OpenMPDecompositionTest.cpp
 
   DEPENDS
   acc_gen
diff --git a/llvm/unittests/Frontend/OpenMPDecompositionTest.cpp b/llvm/unittests/Frontend/OpenMPDecompositionTest.cpp
new file mode 100644
index 00000000000000..df48e9cc0ff4a4
--- /dev/null
+++ b/llvm/unittests/Frontend/OpenMPDecompositionTest.cpp
@@ -0,0 +1,999 @@
+//===- llvm/unittests/Frontend/OpenMPDecompositionTest.cpp ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Frontend/OpenMP/ClauseT.h"
+#include "llvm/Frontend/OpenMP/ConstructDecompositionT.h"
+#include "llvm/Frontend/OpenMP/OMP.h"
+#include "gtest/gtest.h"
+
+#include <iterator>
+#include <optional>
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+// The actual tests start at comment "--- Test" below.
+
+// Create simple instantiations of all clauses to allow manual construction
+// of clauses, and implement emitting of a directive with clauses to a string.
+//
+// The tests then follow the pattern
+// 1. Create a list of clauses.
+// 2. Pass them, together with a construct, to the decomposition class.
+// 3. Extract individual resulting leaf constructs with clauses applied
+//    to them.
+// 4. Convert them to strings and compare with expected outputs.
+
+namespace omp {
+struct TypeTy {}; // placeholder
+struct ExprTy {}; // placeholder
+using IdTy = std::string;
+} // namespace omp
+
+namespace tomp::type {
+template <> struct ObjectT<omp::IdTy, omp::ExprTy> {
+  const omp::IdTy &id() const { return name; }
+  const std::optional<omp::ExprTy> ref() const { return omp::ExprTy{}; }
+
+  omp::IdTy name;
+};
+} // namespace tomp::type
+
+namespace omp {
+template <typename ElemTy> using List = tomp::type::ListT<ElemTy>;
+
+using Object = tomp::ObjectT<IdTy, ExprTy>;
+
+namespace clause {
+using DefinedOperator = tomp::type::DefinedOperatorT<IdTy, ExprTy>;
+using ProcedureDesignator = tomp::type::ProcedureDesignatorT<IdTy, ExprTy>;
+using ReductionOperator = tomp::type::ReductionIdentifierT<IdTy, ExprTy>;
+
+using AcqRel = tomp::clause::AcqRelT<TypeTy, IdTy, ExprTy>;
+using Acquire = tomp::clause::AcquireT<TypeTy, IdTy, ExprTy>;
+using AdjustArgs = tomp::clause::AdjustArgsT<TypeTy, IdTy, ExprTy>;
+using Affinity = tomp::clause::AffinityT<TypeTy, IdTy, ExprTy>;
+using Aligned = tomp::clause::AlignedT<TypeTy, IdTy, ExprTy>;
+using Align = tomp::clause::AlignT<TypeTy, IdTy, ExprTy>;
+using Allocate = tomp::clause::AllocateT<TypeTy, IdTy, ExprTy>;
+using Allocator = tomp::clause::AllocatorT<TypeTy, IdTy, ExprTy>;
+using AppendArgs = tomp::clause::AppendArgsT<TypeTy, IdTy, ExprTy>;
+using AtomicDefaultMemOrder =
+    tomp::clause::AtomicDefaultMemOrderT<TypeTy, IdTy, ExprTy>;
+using At = tomp::clause::AtT<TypeTy, IdTy, ExprTy>;
+using Bind = tomp::clause::BindT<TypeTy, IdTy, ExprTy>;
+using Capture = tomp::clause::CaptureT<TypeTy, IdTy, ExprTy>;
+using Collapse = tomp::clause::CollapseT<TypeTy, IdTy, ExprTy>;
+using Compare = tomp::clause::CompareT<TypeTy, IdTy, ExprTy>;
+using Copyin = tomp::clause::CopyinT<TypeTy, IdTy, ExprTy>;
+using Copyprivate = tomp::clause::CopyprivateT<TypeTy, IdTy, ExprTy>;
+using Defaultmap = tomp::clause::DefaultmapT<TypeTy, IdTy, ExprTy>;
+using Default = tomp::clause::DefaultT<TypeTy, IdTy, ExprTy>;
+using Depend = tomp::clause::DependT<TypeTy, IdTy, ExprTy>;
+using Destroy = tomp::clause::DestroyT<TypeTy, IdTy, ExprTy>;
+using Detach = tomp::clause::DetachT<TypeTy, IdTy, ExprTy>;
+using Device = tomp::clause::DeviceT<TypeTy, IdTy, ExprTy>;
+using DeviceType = tomp::clause::DeviceTypeT<TypeTy, IdTy, ExprTy>;
+using DistSchedule = tomp::clause::DistScheduleT<TypeTy, IdTy, ExprTy>;
+using Doacross = tomp::clause::DoacrossT<TypeTy, IdTy, ExprTy>;
+using DynamicAllocators =
+    tomp::clause::DynamicAllocatorsT<TypeTy, IdTy, ExprTy>;
+using Enter = tomp::clause::EnterT<TypeTy, IdTy, ExprTy>;
+using Exclusive = tomp::clause::ExclusiveT<TypeTy, IdTy, ExprTy>;
+using Fail = tomp::clause::FailT<TypeTy, IdTy, ExprTy>;
+using Filter = tomp::clause::FilterT<TypeTy, IdTy, ExprTy>;
+using Final = tomp::clause::FinalT<TypeTy, IdTy, ExprTy>;
+using Firstprivate = tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy>;
+using From = tomp::clause::FromT<TypeTy, IdTy, ExprTy>;
+using Full = tomp::clause::FullT<TypeTy, IdTy, ExprTy>;
+using Grainsize = tomp::clause::GrainsizeT<TypeTy, IdTy, ExprTy>;
+using HasDeviceAddr = tomp::clause::HasDeviceAddrT<TypeTy, IdTy, ExprTy>;
+using Hint = tomp::clause::HintT<TypeTy, IdTy, ExprTy>;
+using If = tomp::clause::IfT<TypeTy, IdTy, ExprTy>;
+using Inbranch = tomp::clause::InbranchT<TypeTy, IdTy, ExprTy>;
+using Inclusive = tomp::clause::InclusiveT<TypeTy, IdTy, ExprTy>;
+using Indirect = tomp::clause::IndirectT<TypeTy, IdTy, ExprTy>;
+using Init = tomp::clause::InitT<TypeTy, IdTy, ExprTy>;
+using InReduction = tomp::clause::InReductionT<TypeTy, IdTy, ExprTy>;
+using IsDevicePtr = tomp::clause::IsDevicePtrT<TypeTy, IdTy, ExprTy>;
+using Lastprivate = tomp::clause::LastprivateT<TypeTy, IdTy, ExprTy>;
+using Linear = tomp::clause::LinearT<TypeTy, IdTy, ExprTy>;
+using Link = tomp::clause::LinkT<TypeTy, IdTy, ExprTy>;
+using Map = tomp::clause::MapT<TypeTy, IdTy, ExprTy>;
+using Match = tomp::clause::MatchT<TypeTy, IdTy, ExprTy>;
+using Mergeable = tomp::clause::MergeableT<TypeTy, IdTy, ExprTy>;
+using Message = tomp::clause::MessageT<TypeTy, IdTy, ExprTy>;
+using Nocontext = tomp::clause::NocontextT<TypeTy, IdTy, ExprTy>;
+using Nogroup = tomp::clause::NogroupT<TypeTy, IdTy, ExprTy>;
+using Nontemporal = tomp::clause::NontemporalT<TypeTy, IdTy, ExprTy>;
+using Notinbranch = tomp::clause::NotinbranchT<TypeTy, IdTy, ExprTy>;
+using Novariants = tomp::clause::NovariantsT<TypeTy, IdTy, ExprTy>;
+using Nowait = tomp::clause::NowaitT<TypeTy, IdTy, ExprTy>;
+using NumTasks = tomp::clause::NumTasksT<TypeTy, IdTy, ExprTy>;
+using NumTeams = tomp::clause::NumTeamsT<TypeTy, IdTy, ExprTy>;
+using NumThreads = tomp::clause::NumThreadsT<TypeTy, IdTy, ExprTy>;
+using OmpxAttribute = tomp::clause::OmpxAttributeT<TypeTy, IdTy, ExprTy>;
+using OmpxBare = tomp::clause::OmpxBareT<TypeTy, IdTy, ExprTy>;
+using OmpxDynCgroupMem = tomp::clause::OmpxDynCgroupMemT<TypeTy, IdTy, ExprTy>;
+using Ordered = tomp::clause::OrderedT<TypeTy, IdTy, ExprTy>;
+using Order = tomp::clause::OrderT<TypeTy, IdTy, ExprTy>;
+using Partial = tomp::clause::PartialT<TypeTy, IdTy, ExprTy>;
+using Priority = tomp::clause::PriorityT<TypeTy, IdTy, ExprTy>;
+using Private = tomp::clause::PrivateT<TypeTy, IdTy, ExprTy>;
+using ProcBind = tomp::clause::ProcBindT<TypeTy, IdTy, ExprTy>;
+using Read = tomp::clause::ReadT<TypeTy, IdTy, ExprTy>;
+using Reduction = tomp::clause::ReductionT<TypeTy, IdTy, ExprTy>;
+using Relaxed = tomp::clause::RelaxedT<TypeTy, IdTy, ExprTy>;
+using Release = tomp::clause::ReleaseT<TypeTy, IdTy, ExprTy>;
+using ReverseOffload = tomp::clause::ReverseOffloadT<TypeTy, IdTy, ExprTy>;
+using Safelen = tomp::clause::SafelenT<TypeTy, IdTy, ExprTy>;
+using Schedule = tomp::clause::ScheduleT<TypeTy, IdTy, ExprTy>;
+using SeqCst = tomp::clause::SeqCstT<TypeTy, IdTy, ExprTy>;
+using Severity = tomp::clause::SeverityT<TypeTy, IdTy, ExprTy>;
+using Shared = tomp::clause::SharedT<TypeTy, IdTy, ExprTy>;
+using Simdlen = tomp::clause::SimdlenT<TypeTy, IdTy, ExprTy>;
+using Simd = tomp::clause::SimdT<TypeTy, IdTy, ExprTy>;
+using Sizes = tomp::clause::SizesT<TypeTy, IdTy, ExprTy>;
+using TaskReduction = tomp::clause::TaskReductionT<TypeTy, IdTy, ExprTy>;
+using ThreadLimit = tomp::clause::ThreadLimitT<TypeTy, IdTy, ExprTy>;
+using Threads = tomp::clause::ThreadsT<TypeTy, IdTy, ExprTy>;
+using To = tomp::clause::ToT<TypeTy, IdTy, ExprTy>;
+using UnifiedAddress = tomp::clause::UnifiedAddressT<TypeTy, IdTy, ExprTy>;
+using UnifiedSharedMemory =
+    tomp::clause::UnifiedSharedMemoryT<TypeTy, IdTy, ExprTy>;
+using Uniform = tomp::clause::UniformT<TypeTy, IdTy, ExprTy>;
+using Unknown = tomp::clause::UnknownT<TypeTy, IdTy, ExprTy>;
+using Untied = tomp::clause::UntiedT<TypeTy, IdTy, ExprTy>;
+using Update = tomp::clause::UpdateT<TypeTy, IdTy, ExprTy>;
+using UseDeviceAddr = tomp::clause::UseDeviceAddrT<TypeTy, IdTy, ExprTy>;
+using UseDevicePtr = tomp::clause::UseDevicePtrT<TypeTy, IdTy, ExprTy>;
+using UsesAllocators = tomp::clause::UsesAllocatorsT<TypeTy, IdTy, ExprTy>;
+using Use = tomp::clause::UseT<TypeTy, IdTy, ExprTy>;
+using Weak = tomp::clause::WeakT<TypeTy, IdTy, ExprTy>;
+using When = tomp::clause::WhenT<TypeTy, IdTy, ExprTy>;
+using Write = tomp::clause::WriteT<TypeTy, IdTy, ExprTy>;
+} // namespace clause
+
+struct Helper {
+  std::optional<Object> getBaseObject(const Object &object) {
+    return std::nullopt;
+  }
+  std::optional<Object> getLoopIterVar() { return std::nullopt; }
+};
+
+using Clause = tomp::ClauseT<TypeTy, IdTy, ExprTy>;
+using ConstructDecomposition = tomp::ConstructDecompositionT<Clause, Helper>;
+using DirectiveWithClauses = tomp::DirectiveWithClauses<Clause>;
+} // namespace omp
+
+struct StringifyClause {
+  static std::string join(const omp::List<std::string> &Strings) {
+    std::stringstream Stream;
+    for (const auto &[Index, String] : llvm::enumerate(Strings)) {
+      if (Index != 0)
+        Stream << ", ";
+      Stream << String;
+    }
+    return Stream.str();
+  }
+
+  static std::string to_str(llvm::omp::Directive D) {
+    return getOpenMPDirectiveName(D).str();
+  }
+  static std::string to_str(llvm::omp::Clause C) {
+    return getOpenMPClauseName(C).str();
+  }
+  static std::string to_str(const omp::TypeTy &Type) { return "type"; }
+  static std::string to_str(const omp::ExprTy &Expr) { return "expr"; }
+  static std::string to_str(const omp::Object &Obj) { return Obj.id(); }
+
+  template <typename U>
+  static std::enable_if_t<std::is_enum_v<llvm::remove_cvref_t<U>>, std::string>
+  to_str(U &&Item) {
+    return std::to_string(llvm::to_underlying(Item));
+  }
+
+  template <typename U> static std::string to_str(const omp::List<U> &Items) {
+    omp::List<std::string> Names;
+    llvm::transform(Items, std::back_inserter(Names),
+                    [](auto &&S) { return to_str(S); });
+    return "(" + join(Names) + ")";
+  }
+
+  template <typename U>
+  static std::string to_str(const std::optional<U> &Item) {
+    if (Item)
+      return to_str(*Item);
+    return "";
+  }
+
+  template <typename... Us, size_t... Is>
+  static std::string to_str(const std::tuple<Us...> &Tuple,
+                            std::index_sequence<Is...>) {
+    omp::List<std::string> Strings;
+    (Strings.push_back(to_str(std::get<Is>(Tuple))), ...);
+    return "(" + join(Strings) + ")";
+  }
+
+  template <typename U>
+  static std::enable_if_t<llvm::remove_cvref_t<U>::EmptyTrait::value,
+                          std::string>
+  to_str(U &&Item) {
+    return "";
+  }
+
+  template <typename U>
+  static std::enable_if_t<llvm::remove_cvref_t<U>::IncompleteTrait::value,
+                          std::string>
+  to_str(U &&Item) {
+    return "";
+  }
+
+  template <typename U>
+  static std::enable_if_t<llvm::remove_cvref_t<U>::WrapperTrait::value,
+                          std::string>
+  to_str(U &&Item) {
+    // For a wrapper, stringify the wrappee, and only add parentheses if
+    // there aren't any already.
+    std::string Str = to_str(Item.v);
+    if (!Str.empty()) {
+      if (Str.front() == '(' && Str.back() == ')')
+        return Str;
+    }
+    return "(" + to_str(Item.v) + ")";
+  }
+
+  template <typename U>
+  static std::enable_if_t<llvm::remove_cvref_t<U>::TupleTrait::value,
+                          std::string>
+  to_str(U &&Item) {
+    constexpr size_t TupleSize =
+        std::tuple_size_v<llvm::remove_cvref_t<decltype(Item.t)>>;
+    return to_str(Item.t, std::make_index_sequence<TupleSize>{});
+  }
+
+  template <typename U>
+  static std::enable_if_t<llvm::remove_cvref_t<U>::UnionTrait::value,
+                          std::string>
+  to_str(U &&Item) {
+    return std::visit([](auto &&S) { return to_str(S); }, Item.u);
+  }
+
+  StringifyClause(const omp::Clause &C)
+      // Rely on content stringification to emit enclosing parentheses.
+      : Str(to_str(C.id) + to_str(C)) {}
+
+  std::string Str;
+};
+
+std::string stringify(const omp::DirectiveWithClauses &DWC) {
+  std::stringstream Stream;
+
+  Stream << getOpenMPDirectiveName(DWC.id).str();
+  for (const omp::Clause &C : DWC.clauses)
+    Stream << ' ' << StringifyClause(C).Str;
+
+  return Stream.str();
+}
+
+// --- Tests ----------------------------------------------------------
+
+namespace {
+using namespace llvm::omp;
+
+class OpenMPDecompositionTest : public testing::Test {
+protected:
+  void SetUp() override {}
+  void TearDown() override {}
+
+  omp::Helper Helper;
+  uint32_t AnyVersion = 999;
+};
+
+// PRIVATE
+// [5.2:111:5-7]
+// Directives: distribute, do, for, loop, parallel, scope, sections, simd,
+// single, target, task, taskloop, teams
+//
+// [5.2:340:1-2]
+// (1) The effect of the 1 private clause is as if it is applied only to the
+// innermost leaf construct that permits it.
+TEST_F(OpenMPDecompositionTest, Private1) {
+  omp::Object x{"x"};
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_private, omp::clause::Private{{x}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper, OMPD_parallel_sections,
+                                  Clauses);
+  ASSERT_EQ(Dec.output.size(), 2u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  ASSERT_EQ(Dir0, "parallel");            // (1)
+  ASSERT_EQ(Dir1, "sections private(x)"); // (1)
+}
+
+TEST_F(OpenMPDecompositionTest, Private2) {
+  omp::Object x{"x"};
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_private, omp::clause::Private{{x}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper, OMPD_parallel_masked,
+                                  Clauses);
+  ASSERT_EQ(Dec.output.size(), 2u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  ASSERT_EQ(Dir0, "parallel private(x)"); // (1)
+  ASSERT_EQ(Dir1, "masked");              // (1)
+}
+
+// FIRSTPRIVATE
+// [5.2:112:5-7]
+// Directives: distribute, do, for, parallel, scope, sections, single, target,
+// task, taskloop, teams
+//
+// [5.2:340:3-20]
+// (3) The effect of the firstprivate clause is as if it is applied to one or
+// more leaf constructs as follows:
+//  (5) To the distribute construct if it is among the constituent constructs;
+//  (6) To the teams construct if it is among the constituent constructs and the
+//      distribute construct is not;
+//  (8) To a worksharing construct that accepts the clause if one is among the
+//      constituent constructs;
+//  (9) To the taskloop construct if it is among the constituent constructs;
+// (10) To the parallel construct if it is among the constituent constructs and
+//      neither a taskloop construct nor a worksharing construct that accepts
+//      the clause is among them;
+// (12) To the target construct if it is among the constituent constructs and
+//      the same list item neither appears in a lastprivate clause nor is the
+//      base variable or base pointer of a list item that appears in a map
+//      clause.
+//
+// (15) If the parallel construct is among the constituent constructs and the
+// effect is not as if the firstprivate clause is applied to it by the above
+// rules, then the effect is as if the shared clause with the same list item is
+// applied to the parallel construct.
+// (17) If the teams construct is among the constituent constructs and the
+// effect is not as if the firstprivate clause is applied to it by the above
+// rules, then the effect is as if the shared clause with the same list item is
+// applied to the teams construct.
+TEST_F(OpenMPDecompositionTest, Firstprivate1) {
+  omp::Object x{"x"};
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_firstprivate, omp::clause::Firstprivate{{x}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper, OMPD_parallel_sections,
+                                  Clauses);
+  ASSERT_EQ(Dec.output.size(), 2u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  ASSERT_EQ(Dir0, "parallel shared(x)");       // (10), (15)
+  ASSERT_EQ(Dir1, "sections firstprivate(x)"); // (8)
+}
+
+TEST_F(OpenMPDecompositionTest, Firstprivate2) {
+  omp::Object x{"x"};
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_firstprivate, omp::clause::Firstprivate{{x}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper,
+                                  OMPD_target_teams_distribute, Clauses);
+  ASSERT_EQ(Dec.output.size(), 3u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  std::string Dir2 = stringify(Dec.output[2]);
+  ASSERT_EQ(Dir0, "target firstprivate(x)");     // (12)
+  ASSERT_EQ(Dir1, "teams shared(x)");            // (6), (17)
+  ASSERT_EQ(Dir2, "distribute firstprivate(x)"); // (5)
+}
+
+TEST_F(OpenMPDecompositionTest, Firstprivate3) {
+  omp::Object x{"x"};
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_firstprivate, omp::clause::Firstprivate{{x}}},
+      {OMPC_lastprivate, omp::clause::Lastprivate{{std::nullopt, {x}}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper,
+                                  OMPD_target_teams_distribute, Clauses);
+  ASSERT_EQ(Dec.output.size(), 3u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  std::string Dir2 = stringify(Dec.output[2]);
+  ASSERT_EQ(Dir0, "target map(2, , , , (x))"); // (12), (27)
+  ASSERT_EQ(Dir1, "teams shared(x)");          // (6), (17)
+  ASSERT_EQ(Dir2, "distribute firstprivate(x) lastprivate(, (x))"); // (5), (21)
+}
+
+TEST_F(OpenMPDecompositionTest, Firstprivate4) {
+  omp::Object x{"x"};
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_firstprivate, omp::clause::Firstprivate{{x}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper, OMPD_target_teams,
+                                  Clauses);
+  ASSERT_EQ(Dec.output.size(), 2u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  ASSERT_EQ(Dir0, "target firstprivate(x)"); // (12)
+  ASSERT_EQ(Dir1, "teams firstprivate(x)");  // (6)
+}
+
+TEST_F(OpenMPDecompositionTest, Firstprivate5) {
+  omp::Object x{"x"};
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_firstprivate, omp::clause::Firstprivate{{x}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper,
+                                  OMPD_parallel_masked_taskloop, Clauses);
+  ASSERT_EQ(Dec.output.size(), 3u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  std::string Dir2 = stringify(Dec.output[2]);
+  ASSERT_EQ(Dir0, "parallel shared(x)"); // (10)
+  ASSERT_EQ(Dir1, "masked");
+  ASSERT_EQ(Dir2, "taskloop firstprivate(x)"); // (9)
+}
+
+TEST_F(OpenMPDecompositionTest, Firstprivate6) {
+  omp::Object x{"x"};
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_firstprivate, omp::clause::Firstprivate{{x}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper, OMPD_parallel_masked,
+                                  Clauses);
+  ASSERT_EQ(Dec.output.size(), 2u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  ASSERT_EQ(Dir0, "parallel firstprivate(x)"); // (10)
+  ASSERT_EQ(Dir1, "masked");
+}
+
+TEST_F(OpenMPDecompositionTest, Firstprivate7) {
+  omp::Object x{"x"};
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_firstprivate, omp::clause::Firstprivate{{x}}},
+  };
+
+  // Composite constructs are still decomposed.
+  omp::ConstructDecomposition Dec(AnyVersion, Helper, OMPD_teams_distribute,
+                                  Clauses);
+  ASSERT_EQ(Dec.output.size(), 2u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  ASSERT_EQ(Dir0, "teams shared(x)");            // (17)
+  ASSERT_EQ(Dir1, "distribute firstprivate(x)"); // (5)
+}
+
+// LASTPRIVATE
+// [5.2:115:7-8]
+// Directives: distribute, do, for, loop, sections, simd, taskloop
+//
+// [5.2:340:21-30]
+// (21) The effect of the lastprivate clause is as if it is applied to all leaf
+// constructs that permit the clause.
+// (22) If the parallel construct is among the constituent constructs and the
+// list item is not also specified in the firstprivate clause, then the effect
+// of the lastprivate clause is as if the shared clause with the same list item
+// is applied to the parallel construct.
+// (24) If the teams construct is among the constituent constructs and the list
+// item is not also specified in the firstprivate clause, then the effect of the
+// lastprivate clause is as if the shared clause with the same list item is
+// applied to the teams construct.
+// (27) If the target construct is among the constituent constructs and the list
+// item is not the base variable or base pointer of a list item that appears in
+// a map clause, the effect of the lastprivate clause is as if the same list
+// item appears in a map clause with a map-type of tofrom.
+TEST_F(OpenMPDecompositionTest, Lastprivate1) {
+  omp::Object x{"x"};
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_lastprivate, omp::clause::Lastprivate{{std::nullopt, {x}}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper, OMPD_parallel_sections,
+                                  Clauses);
+  ASSERT_EQ(Dec.output.size(), 2u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  ASSERT_EQ(Dir0, "parallel shared(x)");          // (21), (22)
+  ASSERT_EQ(Dir1, "sections lastprivate(, (x))"); // (21)
+}
+
+TEST_F(OpenMPDecompositionTest, Lastprivate2) {
+  omp::Object x{"x"};
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_lastprivate, omp::clause::Lastprivate{{std::nullopt, {x}}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper, OMPD_teams_distribute,
+                                  Clauses);
+  ASSERT_EQ(Dec.output.size(), 2u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  ASSERT_EQ(Dir0, "teams shared(x)");               // (21), (25)
+  ASSERT_EQ(Dir1, "distribute lastprivate(, (x))"); // (21)
+}
+
+TEST_F(OpenMPDecompositionTest, Lastprivate3) {
+  omp::Object x{"x"};
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_lastprivate, omp::clause::Lastprivate{{std::nullopt, {x}}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper, OMPD_target_parallel_do,
+                                  Clauses);
+  ASSERT_EQ(Dec.output.size(), 3u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  std::string Dir2 = stringify(Dec.output[2]);
+  ASSERT_EQ(Dir0, "target map(2, , , , (x))"); // (21), (27)
+  ASSERT_EQ(Dir1, "parallel shared(x)");       // (22)
+  ASSERT_EQ(Dir2, "do lastprivate(, (x))");    // (21)
+}
+
+// SHARED
+// [5.2:110:5-6]
+// Directives: parallel, task, taskloop, teams
+//
+// [5.2:340:31-32]
+// (31) The effect of the shared, default, thread_limit, or order clause is as
+// if it is applied to all leaf constructs that permit the clause.
+TEST_F(OpenMPDecompositionTest, Shared1) {
+  omp::Object x{"x"};
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_shared, omp::clause::Shared{{x}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper,
+                                  OMPD_parallel_masked_taskloop, Clauses);
+  ASSERT_EQ(Dec.output.size(), 3u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  std::string Dir2 = stringify(Dec.output[2]);
+  ASSERT_EQ(Dir0, "parallel shared(x)"); // (31)
+  ASSERT_EQ(Dir1, "masked");             // (31)
+  ASSERT_EQ(Dir2, "taskloop shared(x)"); // (31)
+}
+
+// DEFAULT
+// [5.2:109:5-6]
+// Directives: parallel, task, taskloop, teams
+//
+// [5.2:340:31-32]
+// (31) The effect of the shared, default, thread_limit, or order clause is as
+// if it is applied to all leaf constructs that permit the clause.
+TEST_F(OpenMPDecompositionTest, Default1) {
+  omp::Object x{"x"};
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_default,
+       omp::clause::Default{
+           omp::clause::Default::DataSharingAttribute::Firstprivate}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper,
+                                  OMPD_parallel_masked_taskloop, Clauses);
+  ASSERT_EQ(Dec.output.size(), 3u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  std::string Dir2 = stringify(Dec.output[2]);
+  ASSERT_EQ(Dir0, "parallel default(0)"); // (31)
+  ASSERT_EQ(Dir1, "masked");              // (31)
+  ASSERT_EQ(Dir2, "taskloop default(0)"); // (31)
+}
+
+// THREAD_LIMIT
+// [5.2:277:14-15]
+// Directives: target, teams
+//
+// [5.2:340:31-32]
+// (31) The effect of the shared, default, thread_limit, or order clause is as
+// if it is applied to all leaf constructs that permit the clause.
+TEST_F(OpenMPDecompositionTest, ThreadLimit1) {
+  omp::Object x{"x"};
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_thread_limit, omp::clause::ThreadLimit{omp::ExprTy{}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper,
+                                  OMPD_target_teams_distribute, Clauses);
+  ASSERT_EQ(Dec.output.size(), 3u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  std::string Dir2 = stringify(Dec.output[2]);
+  ASSERT_EQ(Dir0, "target thread_limit(expr)"); // (31)
+  ASSERT_EQ(Dir1, "teams thread_limit(expr)");  // (31)
+  ASSERT_EQ(Dir2, "distribute");                // (31)
+}
+
+// ORDER
+// [5.2:234:3-4]
+// Directives: distribute, do, for, loop, simd
+//
+// [5.2:340:31-32]
+// (31) The effect of the shared, default, thread_limit, or order clause is as
+// if it is applied to all leaf constructs that permit the clause.
+TEST_F(OpenMPDecompositionTest, Order1) {
+  omp::Object x{"x"};
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_order,
+       omp::clause::Order{{omp::clause::Order::OrderModifier::Unconstrained,
+                           omp::clause::Order::Ordering::Concurrent}}},
+  };
+
+  omp::ConstructDecomposition Dec(
+      AnyVersion, Helper, OMPD_target_teams_distribute_parallel_for_simd,
+      Clauses);
+  ASSERT_EQ(Dec.output.size(), 6u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  std::string Dir2 = stringify(Dec.output[2]);
+  std::string Dir3 = stringify(Dec.output[3]);
+  std::string Dir4 = stringify(Dec.output[4]);
+  std::string Dir5 = stringify(Dec.output[5]);
+  ASSERT_EQ(Dir0, "target"); // (31)
+  ASSERT_EQ(Dir1, "teams");  // (31)
+  // XXX OMP.td doesn't list "order" as allowed for "distribute"
+  ASSERT_EQ(Dir2, "distribute");       // (31)
+  ASSERT_EQ(Dir3, "parallel");         // (31)
+  ASSERT_EQ(Dir4, "for order(1, 0)");  // (31)
+  ASSERT_EQ(Dir5, "simd order(1, 0)"); // (31)
+}
+
+// ALLOCATE
+// [5.2:178:7-9]
+// Directives: allocators, distribute, do, for, parallel, scope, sections,
+// single, target, task, taskgroup, taskloop, teams
+//
+// [5.2:340:33-35]
+// (33) The effect of the allocate clause is as if it is applied to all leaf
+// constructs that permit the clause and to which a data-sharing attribute
+// clause that may create a private copy of the same list item is applied.
+TEST_F(OpenMPDecompositionTest, Allocate1) {
+  omp::Object x{"x"};
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_allocate,
+       omp::clause::Allocate{{std::nullopt, std::nullopt, std::nullopt, {x}}}},
+      {OMPC_private, omp::clause::Private{{x}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper, OMPD_parallel_sections,
+                                  Clauses);
+  ASSERT_EQ(Dec.output.size(), 2u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  ASSERT_EQ(Dir0, "parallel");                                // (33)
+  ASSERT_EQ(Dir1, "sections private(x) allocate(, , , (x))"); // (33)
+}
+
+// REDUCTION
+// [5.2:134:17-18]
+// Directives: do, for, loop, parallel, scope, sections, simd, taskloop, teams
+//
+// [5.2:340-341:36-13]
+// (36) The effect of the reduction clause is as if it is applied to all leaf
+// constructs that permit the clause, except for the following constructs:
+//  (1) The parallel construct, when combined with the sections,
+//      worksharing-loop, loop, or taskloop construct; and
+//  (3) The teams construct, when combined with the loop construct.
+// (4) For the parallel and teams constructs above, the effect of the reduction
+// clause instead is as if each list item or, for any list item that is an array
+// item, its corresponding base array or base pointer appears in a shared clause
+// for the construct.
+// (6) If the task reduction-modifier is specified, the effect is as if it only
+// modifies the behavior of the reduction clause on the innermost leaf construct
+// that accepts the modifier (see Section 5.5.8).
+// (8) If the inscan reduction-modifier is specified, the effect is as if it
+// modifies the behavior of the reduction clause on all constructs of the
+// combined construct to which the clause is applied and that accept the
+// modifier.
+// (10) If a list item in a reduction clause on a combined target construct does
+// not have the same base variable or base pointer as a list item in a map
+// clause on the construct, then the effect is as if the list item in the
+// reduction clause appears as a list item in a map clause with a map-type of
+// tofrom.
+namespace red {
+// Make is easier to construct reduction operators from built-in intrinsics.
+omp::clause::ReductionOperator
+makeOp(omp::clause::DefinedOperator::IntrinsicOperator Op) {
+  return omp::clause::ReductionOperator{omp::clause::DefinedOperator{Op}};
+}
+} // namespace red
+
+TEST_F(OpenMPDecompositionTest, Reduction1) {
+  omp::Object x{"x"};
+  auto Add = red::makeOp(omp::clause::DefinedOperator::IntrinsicOperator::Add);
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_reduction, omp::clause::Reduction{{std::nullopt, {Add}, {x}}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper, OMPD_parallel_sections,
+                                  Clauses);
+  ASSERT_EQ(Dec.output.size(), 2u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  ASSERT_EQ(Dir0, "parallel shared(x)");             // (36), (1), (4)
+  ASSERT_EQ(Dir1, "sections reduction(, (3), (x))"); // (36)
+}
+
+TEST_F(OpenMPDecompositionTest, Reduction2) {
+  omp::Object x{"x"};
+  auto Add = red::makeOp(omp::clause::DefinedOperator::IntrinsicOperator::Add);
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_reduction, omp::clause::Reduction{{std::nullopt, {Add}, {x}}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper, OMPD_parallel_masked,
+                                  Clauses);
+  ASSERT_EQ(Dec.output.size(), 2u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  ASSERT_EQ(Dir0, "parallel reduction(, (3), (x))"); // (36), (1), (4)
+  ASSERT_EQ(Dir1, "masked");                         // (36)
+}
+
+TEST_F(OpenMPDecompositionTest, Reduction3) {
+  omp::Object x{"x"};
+  auto Add = red::makeOp(omp::clause::DefinedOperator::IntrinsicOperator::Add);
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_reduction, omp::clause::Reduction{{std::nullopt, {Add}, {x}}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper, OMPD_teams_loop, Clauses);
+  ASSERT_EQ(Dec.output.size(), 2u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  ASSERT_EQ(Dir0, "teams shared(x)");            // (36), (3), (4)
+  ASSERT_EQ(Dir1, "loop reduction(, (3), (x))"); // (36)
+}
+
+TEST_F(OpenMPDecompositionTest, Reduction4) {
+  omp::Object x{"x"};
+  auto Add = red::makeOp(omp::clause::DefinedOperator::IntrinsicOperator::Add);
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_reduction, omp::clause::Reduction{{std::nullopt, {Add}, {x}}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper,
+                                  OMPD_teams_distribute_parallel_for, Clauses);
+  ASSERT_EQ(Dec.output.size(), 4u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  std::string Dir2 = stringify(Dec.output[2]);
+  std::string Dir3 = stringify(Dec.output[3]);
+  ASSERT_EQ(Dir0, "teams reduction(, (3), (x))"); // (36), (3)
+  ASSERT_EQ(Dir1, "distribute");                  // (36)
+  ASSERT_EQ(Dir2, "parallel shared(x)");          // (36), (1), (4)
+  ASSERT_EQ(Dir3, "for reduction(, (3), (x))");   // (36)
+}
+
+TEST_F(OpenMPDecompositionTest, Reduction5) {
+  omp::Object x{"x"};
+  auto Add = red::makeOp(omp::clause::DefinedOperator::IntrinsicOperator::Add);
+  auto TaskMod = omp::clause::Reduction::ReductionModifier::Task;
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_reduction, omp::clause::Reduction{{TaskMod, {Add}, {x}}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper,
+                                  OMPD_teams_distribute_parallel_for, Clauses);
+  ASSERT_EQ(Dec.output.size(), 4u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  std::string Dir2 = stringify(Dec.output[2]);
+  std::string Dir3 = stringify(Dec.output[3]);
+  ASSERT_EQ(Dir0, "teams reduction(, (3), (x))"); // (36), (3), (6)
+  ASSERT_EQ(Dir1, "distribute");                  // (36)
+  ASSERT_EQ(Dir2, "parallel shared(x)");          // (36), (1), (4)
+  ASSERT_EQ(Dir3, "for reduction(2, (3), (x))");  // (36), (6)
+}
+
+TEST_F(OpenMPDecompositionTest, Reduction6) {
+  omp::Object x{"x"};
+  auto Add = red::makeOp(omp::clause::DefinedOperator::IntrinsicOperator::Add);
+  auto InscanMod = omp::clause::Reduction::ReductionModifier::Inscan;
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_reduction, omp::clause::Reduction{{InscanMod, {Add}, {x}}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper,
+                                  OMPD_teams_distribute_parallel_for, Clauses);
+  ASSERT_EQ(Dec.output.size(), 4u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  std::string Dir2 = stringify(Dec.output[2]);
+  std::string Dir3 = stringify(Dec.output[3]);
+  ASSERT_EQ(Dir0, "teams reduction(, (3), (x))"); // (36), (3), (8)
+  ASSERT_EQ(Dir1, "distribute");                  // (36)
+  ASSERT_EQ(Dir2, "parallel shared(x)");          // (36), (1), (4)
+  ASSERT_EQ(Dir3, "for reduction(1, (3), (x))");  // (36), (8)
+}
+
+TEST_F(OpenMPDecompositionTest, Reduction7) {
+  omp::Object x{"x"};
+  auto Add = red::makeOp(omp::clause::DefinedOperator::IntrinsicOperator::Add);
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_reduction, omp::clause::Reduction{{std::nullopt, {Add}, {x}}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper, OMPD_target_parallel_do,
+                                  Clauses);
+  ASSERT_EQ(Dec.output.size(), 3u);
+
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  std::string Dir2 = stringify(Dec.output[2]);
+  // XXX Currently OMP.td allows "reduction" on "target".
+  ASSERT_EQ(Dir0,
+            "target reduction(, (3), (x)) map(2, , , , (x))"); // (36), (10)
+  ASSERT_EQ(Dir1, "parallel shared(x)");                       // (36), (1), (4)
+  ASSERT_EQ(Dir2, "do reduction(, (3), (x))");                 // (36)
+}
+
+// IF
+// [5.2:72:7-9]
+// Directives: cancel, parallel, simd, target, target data, target enter data,
+// target exit data, target update, task, taskloop
+//
+// [5.2:72:15-18]
+// (15) For combined or composite constructs, the if clause only applies to the
+// semantics of the construct named in the directive-name-modifier.
+// (16) For a combined or composite construct, if no directive-name-modifier is
+// specified then the if clause applies to all constituent constructs to which
+// an if clause can apply.
+TEST_F(OpenMPDecompositionTest, If1) {
+  omp::List<omp::Clause> Clauses{
+      {OMPC_if,
+       omp::clause::If{{llvm::omp::Directive::OMPD_parallel, omp::ExprTy{}}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper,
+                                  OMPD_target_parallel_for_simd, Clauses);
+  ASSERT_EQ(Dec.output.size(), 4u);
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  std::string Dir2 = stringify(Dec.output[2]);
+  std::string Dir3 = stringify(Dec.output[3]);
+  ASSERT_EQ(Dir0, "target");              // (15)
+  ASSERT_EQ(Dir1, "parallel if(, expr)"); // (15)
+  ASSERT_EQ(Dir2, "for");                 // (15)
+  ASSERT_EQ(Dir3, "simd");                // (15)
+}
+
+TEST_F(OpenMPDecompositionTest, If2) {
+  omp::List<omp::Clause> Clauses{
+      {OMPC_if, omp::clause::If{{std::nullopt, omp::ExprTy{}}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper,
+                                  OMPD_target_parallel_for_simd, Clauses);
+  ASSERT_EQ(Dec.output.size(), 4u);
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  std::string Dir2 = stringify(Dec.output[2]);
+  std::string Dir3 = stringify(Dec.output[3]);
+  ASSERT_EQ(Dir0, "target if(, expr)");   // (16)
+  ASSERT_EQ(Dir1, "parallel if(, expr)"); // (16)
+  ASSERT_EQ(Dir2, "for");                 // (16)
+  ASSERT_EQ(Dir3, "simd if(, expr)");     // (16)
+}
+
+// LINEAR
+// [5.2:118:1-2]
+// Directives: declare simd, do, for, simd
+//
+// [5.2:341:15-22]
+// (15.1) The effect of the linear clause is as if it is applied to the
+// innermost leaf construct.
+// (15.2) Additionally, if the list item is not the iteration variable of a simd
+// or worksharing-loop SIMD construct, the effect on the outer leaf constructs
+// is as if the list item was specified in firstprivate and lastprivate clauses
+// on the combined or composite construct, with the rules specified above
+// applied.
+// (19) If a list item of the linear clause is the iteration variable of a simd
+// or worksharing-loop SIMD construct and it is not declared in the construct,
+// the effect on the outer leaf constructs is as if the list item was specified
+// in a lastprivate clause on the combined or composite construct with the rules
+// specified above applied.
+TEST_F(OpenMPDecompositionTest, Linear1) {
+  omp::Object x{"x"};
+
+  omp::List<omp::Clause> Clauses{
+      {OMPC_linear,
+       omp::clause::Linear{{std::nullopt, std::nullopt, std::nullopt, {x}}}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper, OMPD_for_simd, Clauses);
+  ASSERT_EQ(Dec.output.size(), 2u);
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  ASSERT_EQ(Dir0, "for firstprivate(x) lastprivate(, (x))"); // (15.1), (15.2)
+  ASSERT_EQ(Dir1, "simd linear(, , , (x)) lastprivate(, (x))"); // (15.1)
+}
+
+// NOWAIT
+// [5.2:308:11-13]
+// Directives: dispatch, do, for, interop, scope, sections, single, target,
+// target enter data, target exit data, target update, taskwait, workshare
+//
+// [5.2:341:23]
+// (23) The effect of the nowait clause is as if it is applied to the outermost
+// leaf construct that permits it.
+TEST_F(OpenMPDecompositionTest, Nowait1) {
+  omp::List<omp::Clause> Clauses{
+      {OMPC_nowait, omp::clause::Nowait{}},
+  };
+
+  omp::ConstructDecomposition Dec(AnyVersion, Helper, OMPD_target_parallel_for,
+                                  Clauses);
+  ASSERT_EQ(Dec.output.size(), 3u);
+  std::string Dir0 = stringify(Dec.output[0]);
+  std::string Dir1 = stringify(Dec.output[1]);
+  std::string Dir2 = stringify(Dec.output[2]);
+  ASSERT_EQ(Dir0, "target nowait"); // (23)
+  ASSERT_EQ(Dir1, "parallel");      // (23)
+  ASSERT_EQ(Dir2, "for");           // (23)
+}
+} // namespace

>From 3748f49357ddd9c9fa9c2ee51c7371154c8fb79a Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Mon, 6 May 2024 20:22:00 -0500
Subject: [PATCH 10/15] clang-format

---
 flang/lib/Lower/OpenMP/Clauses.cpp | 5 ++---
 flang/lib/Lower/OpenMP/Clauses.h   | 3 ++-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
index 7257f9dee77dd5..87370c92964a5c 100644
--- a/flang/lib/Lower/OpenMP/Clauses.cpp
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -1234,9 +1234,8 @@ bool transferLocations(const List<Clause> &from, List<Clause> &to) {
   for (Clause &clause : to) {
     if (!clause.source.empty())
       continue;
-    auto found = llvm::find_if(from, [&](const Clause &c) {
-      return c.id == clause.id;
-    });
+    auto found =
+        llvm::find_if(from, [&](const Clause &c) { return c.id == clause.id; });
     // This is not completely accurate, but should be good enough for now.
     // It can be improved in the future if necessary, but in cases of
     // synthesized clauses getting accurate location may be impossible.
diff --git a/flang/lib/Lower/OpenMP/Clauses.h b/flang/lib/Lower/OpenMP/Clauses.h
index 61c8fd2d1e5946..407579319279e0 100644
--- a/flang/lib/Lower/OpenMP/Clauses.h
+++ b/flang/lib/Lower/OpenMP/Clauses.h
@@ -35,7 +35,8 @@ struct TypeTy : public evaluate::SomeType {
 using IdTy = semantics::Symbol *;
 using ExprTy = SomeExpr;
 
-template <typename T> using List = tomp::ListT<T>;
+template <typename T>
+using List = tomp::ListT<T>;
 } // namespace Fortran::lower::omp
 
 namespace tomp::type {

>From 54b637dfa83c36b836702733712e0d3f8e1fb449 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 7 May 2024 08:31:32 -0500
Subject: [PATCH 11/15] Address review comments

---
 flang/lib/Lower/OpenMP/ClauseProcessor.cpp    |  1 -
 flang/lib/Lower/OpenMP/Decomposer.cpp         |  2 +-
 .../Frontend/OpenMP/ConstructDecompositionT.h | 94 +++++++++----------
 3 files changed, 48 insertions(+), 49 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 5af17f232707f0..bb83e8d5e78889 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -520,7 +520,6 @@ bool ClauseProcessor::processCopyin() const {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
   firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
-
   auto checkAndCopyHostAssociateVar =
       [&](Fortran::semantics::Symbol *sym,
           mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr) {
diff --git a/flang/lib/Lower/OpenMP/Decomposer.cpp b/flang/lib/Lower/OpenMP/Decomposer.cpp
index 5305955de309b5..e6897cb81e9474 100644
--- a/flang/lib/Lower/OpenMP/Decomposer.cpp
+++ b/flang/lib/Lower/OpenMP/Decomposer.cpp
@@ -93,7 +93,7 @@ ConstructQueue buildConstructQueue(
   List<UnitConstruct> constructs;
 
   ConstructDecomposition decompose(modOp, semaCtx, eval, compound, clauses);
-  assert(!decompose.output.empty());
+  assert(!decompose.output.empty() && "Construct decomposition failed");
 
   llvm::SmallVector<llvm::omp::Directive> loweringUnits;
   std::ignore =
diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
index c8e7eb4b18b57e..37c88f0fa07b47 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
@@ -92,10 +92,10 @@ struct ConstructDecompositionT {
 
   using ClauseSet = std::unordered_set<const ClauseTy *>;
 
-  ConstructDecompositionT(uint32_t ver, HelperType &hlp,
+  ConstructDecompositionT(uint32_t ver, HelperType &helper,
                           llvm::omp::Directive dir,
                           llvm::ArrayRef<ClauseTy> clauses)
-      : version(ver), construct(dir), helper(hlp) {
+      : version(ver), construct(dir), helper(helper) {
     for (const ClauseTy &clause : clauses)
       nodes.push_back(&clause);
 
@@ -532,20 +532,20 @@ bool ConstructDecompositionT<C, H>::applyClause(
   bool applied = false;
 
   // [5.2:340:3-6]
-  auto hasDistribute = findDirective(llvm::omp::OMPD_distribute);
-  auto hasTeams = findDirective(llvm::omp::OMPD_teams);
-  if (hasDistribute != nullptr) {
-    hasDistribute->clauses.push_back(node);
+  auto dirDistribute = findDirective(llvm::omp::OMPD_distribute);
+  auto dirTeams = findDirective(llvm::omp::OMPD_teams);
+  if (dirDistribute != nullptr) {
+    dirDistribute->clauses.push_back(node);
     applied = true;
     // [5.2:340:17]
-    if (hasTeams != nullptr) {
+    if (dirTeams != nullptr) {
       auto *shared = makeClause(
           llvm::omp::Clause::OMPC_shared,
           tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/clause.v});
-      hasTeams->clauses.push_back(shared);
+      dirTeams->clauses.push_back(shared);
     }
-  } else if (hasTeams != nullptr) {
-    hasTeams->clauses.push_back(node);
+  } else if (dirTeams != nullptr) {
+    dirTeams->clauses.push_back(node);
     applied = true;
   }
 
@@ -560,31 +560,31 @@ bool ConstructDecompositionT<C, H>::applyClause(
     return static_cast<typename decltype(leafs)::value_type *>(nullptr);
   };
 
-  auto hasWorksharing = findWorksharing();
-  if (hasWorksharing != nullptr) {
-    hasWorksharing->clauses.push_back(node);
+  auto dirWorksharing = findWorksharing();
+  if (dirWorksharing != nullptr) {
+    dirWorksharing->clauses.push_back(node);
     applied = true;
   }
 
   // [5.2:340:9]
-  auto hasTaskloop = findDirective(llvm::omp::OMPD_taskloop);
-  if (hasTaskloop != nullptr) {
-    hasTaskloop->clauses.push_back(node);
+  auto dirTaskloop = findDirective(llvm::omp::OMPD_taskloop);
+  if (dirTaskloop != nullptr) {
+    dirTaskloop->clauses.push_back(node);
     applied = true;
   }
 
   // [5.2:340:10]
-  auto hasParallel = findDirective(llvm::omp::OMPD_parallel);
-  if (hasParallel != nullptr) {
-    if (hasTaskloop == nullptr && hasWorksharing == nullptr) {
-      hasParallel->clauses.push_back(node);
+  auto dirParallel = findDirective(llvm::omp::OMPD_parallel);
+  if (dirParallel != nullptr) {
+    if (dirTaskloop == nullptr && dirWorksharing == nullptr) {
+      dirParallel->clauses.push_back(node);
       applied = true;
     } else {
       // [5.2:340:15]
       auto *shared = makeClause(
           llvm::omp::Clause::OMPC_shared,
           tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/clause.v});
-      hasParallel->clauses.push_back(shared);
+      dirParallel->clauses.push_back(shared);
     }
   }
 
@@ -598,8 +598,8 @@ bool ConstructDecompositionT<C, H>::applyClause(
     return false;
   };
 
-  auto hasTarget = findDirective(llvm::omp::OMPD_target);
-  if (hasTarget != nullptr) {
+  auto dirTarget = findDirective(llvm::omp::OMPD_target);
+  if (dirTarget != nullptr) {
     tomp::ObjectListT<IdTy, ExprTy> objects;
     llvm::copy_if(
         clause.v, std::back_inserter(objects), [&](const ObjectTy &object) {
@@ -609,14 +609,14 @@ bool ConstructDecompositionT<C, H>::applyClause(
       auto *firstp = makeClause(
           llvm::omp::Clause::OMPC_firstprivate,
           tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy>{/*List=*/objects});
-      hasTarget->clauses.push_back(firstp);
+      dirTarget->clauses.push_back(firstp);
       applied = true;
     }
   }
 
   // "task" is not handled by any of the cases above.
-  if (auto hasTask = findDirective(llvm::omp::OMPD_task)) {
-    hasTask->clauses.push_back(node);
+  if (auto dirTask = findDirective(llvm::omp::OMPD_task)) {
+    dirTask->clauses.push_back(node);
     applied = true;
   }
 
@@ -672,26 +672,26 @@ bool ConstructDecompositionT<C, H>::applyClause(
 
   if (!sharedObjects.empty()) {
     // [5.2:340:22]
-    if (auto hasParallel = findDirective(llvm::omp::OMPD_parallel)) {
+    if (auto dirParallel = findDirective(llvm::omp::OMPD_parallel)) {
       auto *shared = makeClause(
           llvm::omp::Clause::OMPC_shared,
           tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/sharedObjects});
-      hasParallel->clauses.push_back(shared);
+      dirParallel->clauses.push_back(shared);
       applied = true;
     }
 
     // [5.2:340:24]
-    if (auto hasTeams = findDirective(llvm::omp::OMPD_teams)) {
+    if (auto dirTeams = findDirective(llvm::omp::OMPD_teams)) {
       auto *shared = makeClause(
           llvm::omp::Clause::OMPC_shared,
           tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/sharedObjects});
-      hasTeams->clauses.push_back(shared);
+      dirTeams->clauses.push_back(shared);
       applied = true;
     }
   }
 
   // [5.2:340:27]
-  if (auto hasTarget = findDirective(llvm::omp::OMPD_target)) {
+  if (auto dirTarget = findDirective(llvm::omp::OMPD_target)) {
     tomp::ObjectListT<IdTy, ExprTy> tofrom;
     llvm::copy_if(
         objects, std::back_inserter(tofrom),
@@ -707,7 +707,7 @@ bool ConstructDecompositionT<C, H>::applyClause(
                           /*MapTypeModifier=*/std::nullopt,
                           /*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt,
                           /*LocatorList=*/std::move(tofrom)}});
-      hasTarget->clauses.push_back(map);
+      dirTarget->clauses.push_back(map);
       applied = true;
     }
   }
@@ -847,8 +847,8 @@ bool ConstructDecompositionT<C, H>::applyClause(
   // [5.2:340:36], [5.2:341:1], [5.2:341:3]
   bool applyToParallel = true, applyToTeams = true;
 
-  auto hasParallel = findDirective(llvm::omp::Directive::OMPD_parallel);
-  if (hasParallel) {
+  auto dirParallel = findDirective(llvm::omp::Directive::OMPD_parallel);
+  if (dirParallel) {
     auto exclusions = llvm::concat<const llvm::omp::Directive>(
         getWorksharingLoop(), tomp::ListT<llvm::omp::Directive>{
                                   llvm::omp::Directive::OMPD_loop,
@@ -863,8 +863,8 @@ bool ConstructDecompositionT<C, H>::applyClause(
       applyToParallel = false;
   }
 
-  auto hasTeams = findDirective(llvm::omp::Directive::OMPD_teams);
-  if (hasTeams) {
+  auto dirTeams = findDirective(llvm::omp::Directive::OMPD_teams);
+  if (dirTeams) {
     // The only exclusion is OMPD_loop.
     if (findDirective(llvm::omp::Directive::OMPD_loop))
       applyToTeams = false;
@@ -918,9 +918,9 @@ bool ConstructDecompositionT<C, H>::applyClause(
   for (auto &leaf : llvm::reverse(leafs)) {
     if (!llvm::omp::isAllowedClauseForDirective(leaf.id, node->id, version))
       continue;
-    if (!applyToParallel && &leaf == hasParallel)
+    if (!applyToParallel && &leaf == dirParallel)
       continue;
-    if (!applyToTeams && &leaf == hasTeams)
+    if (!applyToTeams && &leaf == dirTeams)
       continue;
     // Some form of the clause will be applied past this point.
     if (isValidModifier(leaf.id, effective, effectiveApplied)) {
@@ -946,23 +946,23 @@ bool ConstructDecompositionT<C, H>::applyClause(
 
   // [5.2:341:4]
   if (!sharedObjects.empty()) {
-    if (hasParallel && !applyToParallel) {
+    if (dirParallel && !applyToParallel) {
       auto *shared = makeClause(
           llvm::omp::Clause::OMPC_shared,
           tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/sharedObjects});
-      hasParallel->clauses.push_back(shared);
+      dirParallel->clauses.push_back(shared);
     }
-    if (hasTeams && !applyToTeams) {
+    if (dirTeams && !applyToTeams) {
       auto *shared = makeClause(
           llvm::omp::Clause::OMPC_shared,
           tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/sharedObjects});
-      hasTeams->clauses.push_back(shared);
+      dirTeams->clauses.push_back(shared);
     }
   }
 
   // [5.2:341:10]
-  auto hasTarget = findDirective(llvm::omp::Directive::OMPD_target);
-  if (hasTarget && leafs.size() > 1) {
+  auto dirTarget = findDirective(llvm::omp::Directive::OMPD_target);
+  if (dirTarget && leafs.size() > 1) {
     tomp::ObjectListT<IdTy, ExprTy> tofrom;
     llvm::copy_if(objects, std::back_inserter(tofrom),
                   [&](const ObjectTy &object) {
@@ -980,7 +980,7 @@ bool ConstructDecompositionT<C, H>::applyClause(
                /*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt,
                /*LocatorList=*/std::move(tofrom)}});
 
-      hasTarget->clauses.push_back(map);
+      dirTarget->clauses.push_back(map);
       applied = true;
     }
   }
@@ -1052,7 +1052,7 @@ bool ConstructDecompositionT<C, H>::applyClause(
     return false;
 
   // [5.2:341:15.2], [5.2:341:19]
-  auto hasSimd = findDirective(llvm::omp::Directive::OMPD_simd);
+  auto dirSimd = findDirective(llvm::omp::Directive::OMPD_simd);
   std::optional<ObjectTy> iterVar = helper.getLoopIterVar();
   const auto &objects = std::get<tomp::ObjectListT<IdTy, ExprTy>>(clause.t);
 
@@ -1062,7 +1062,7 @@ bool ConstructDecompositionT<C, H>::applyClause(
 
   for (const ObjectTy &object : objects) {
     last.push_back(object);
-    if (!hasSimd || !iterVar || object.id() != iterVar->id())
+    if (!dirSimd || !iterVar || object.id() != iterVar->id())
       first.push_back(object);
   }
 

>From 682ce7f7a555d526c717c12defabd7f6c499c5bd Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 7 May 2024 08:54:02 -0500
Subject: [PATCH 12/15] Add comment to `buildConstructQueue`

---
 flang/lib/Lower/OpenMP/Decomposer.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/flang/lib/Lower/OpenMP/Decomposer.h b/flang/lib/Lower/OpenMP/Decomposer.h
index aefc860874cf40..f42d8f5c17408a 100644
--- a/flang/lib/Lower/OpenMP/Decomposer.h
+++ b/flang/lib/Lower/OpenMP/Decomposer.h
@@ -35,6 +35,11 @@ using ConstructQueue = List<UnitConstruct>;
 LLVM_DUMP_METHOD llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
                                                const UnitConstruct &uc);
 
+// Given a potentially compound construct with a list of clauses that
+// apply to it, break it up into individual sub-constructs each with
+// the subset of applicable clauses (plus implicit clauses, if any).
+// From that create a work queue where each work item corresponds to
+// the sub-construct with its clauses.
 ConstructQueue buildConstructQueue(mlir::ModuleOp modOp,
                                    semantics::SemanticsContext &semaCtx,
                                    lower::pft::Evaluation &eval,

>From 3b6542cbf6471d4dd8413977cc6e4f6496babf50 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 7 May 2024 09:15:17 -0500
Subject: [PATCH 13/15] xxx

---
 .../Frontend/OpenMP/ConstructCompositionT.h   | 23 ++++++++-----------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructCompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructCompositionT.h
index f7123a8ec43017..925fb8956a4dd2 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ConstructCompositionT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructCompositionT.h
@@ -29,12 +29,6 @@
 #include <unordered_set>
 #include <utility>
 
-namespace detail {
-template <typename T> struct trivial_hash {
-  size_t operator()(const T &) const { return 1u; }
-};
-} // namespace detail
-
 namespace tomp {
 template <typename ClauseType> struct ConstructCompositionT {
   using ClauseTy = ClauseType;
@@ -49,8 +43,9 @@ template <typename ClauseType> struct ConstructCompositionT {
   DirectiveWithClauses<ClauseTy> merged;
 
 private:
-  using ClauseSet =
-      std::unordered_set<ClauseTy, detail::trivial_hash<ClauseTy>>;
+  // Use an ordered container, since we beed to maintain the order in which
+  // clauses are added to it. This is to avoid non-deterministic output.
+  using ClauseSet = ListT<ClauseTy>;
 
   enum class Presence {
     All,  // Clause is preesnt on all leaf constructs that allow it.
@@ -119,12 +114,14 @@ ConstructCompositionT<C>::ConstructCompositionT(
   for (const auto &[index, leaf] : llvm::enumerate(leafs)) {
     for (const auto &clause : leaf.clauses) {
       // Update clausePresence.
-      auto &set = clausePresence[clause.id];
-      if (set.size() < leafs.size())
-        set.resize(leafs.size());
-      set.set(index);
+      auto &pset = clausePresence[clause.id];
+      if (pset.size() < leafs.size())
+        pset.resize(leafs.size());
+      pset.set(index);
       // Update clauseSets.
-      clauseSets[clause.id].insert(clause);
+      ClauseSet &cset = clauseSets[clause.id];
+      if (!llvm::is_contained(cset, clause))
+        cset.push_back(clause);
     }
   }
 

>From d754010f385d98f4941d807f485b638865e788ae Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 7 May 2024 10:18:35 -0500
Subject: [PATCH 14/15] Remove potential non-determinism, try to preserve
 clause/symbol order

---
 flang/test/Integration/OpenMP/copyprivate.f90 |   8 +-
 .../Lower/OpenMP/allocatable-array-bounds.f90 |  36 +++---
 flang/test/Lower/OpenMP/array-bounds.f90      |  10 +-
 flang/test/Lower/OpenMP/copyprivate.f90       |  48 +++----
 .../Lower/OpenMP/default-clause-byref.f90     |   4 +-
 flang/test/Lower/OpenMP/default-clause.f90    |   4 +-
 .../parallel-firstprivate-clause-scalar.f90   | 120 +++++++++---------
 .../OpenMP/parallel-wsloop-firstpriv.f90      |   8 +-
 .../Lower/OpenMP/wsloop-reduction-multi.f90   |  30 ++---
 .../Frontend/OpenMP/ConstructCompositionT.h   |  32 ++---
 10 files changed, 151 insertions(+), 149 deletions(-)

diff --git a/flang/test/Integration/OpenMP/copyprivate.f90 b/flang/test/Integration/OpenMP/copyprivate.f90
index ecb8a8c24648a5..d32319a18c28bb 100644
--- a/flang/test/Integration/OpenMP/copyprivate.f90
+++ b/flang/test/Integration/OpenMP/copyprivate.f90
@@ -45,16 +45,16 @@
 !CHECK:       [[OMP_REGION_END]]:
 !CHECK:         %[[THREAD_NUM2:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[LOC:.*]])
 !CHECK:         %[[DID_IT_VAL:.*]] = load i32, ptr %[[DID_IT]]
-!CHECK:         call void @__kmpc_copyprivate(ptr @[[LOC]], i32 %[[THREAD_NUM2]], i64 0, ptr %[[J]], ptr @_copy_i32, i32 %[[DID_IT_VAL]])
+!CHECK:         call void @__kmpc_copyprivate(ptr @[[LOC]], i32 %[[THREAD_NUM2]], i64 0, ptr %[[I]], ptr @_copy_i32, i32 %[[DID_IT_VAL]])
 !CHECK:         %[[THREAD_NUM3:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[LOC]])
 !CHECK:         %[[DID_IT_VAL2:.*]] = load i32, ptr %[[DID_IT]]
-!CHECK:         call void @__kmpc_copyprivate(ptr @[[LOC]], i32 %[[THREAD_NUM3]], i64 0, ptr %[[I]], ptr @_copy_i32, i32 %[[DID_IT_VAL2]])
+!CHECK:         call void @__kmpc_copyprivate(ptr @[[LOC]], i32 %[[THREAD_NUM3]], i64 0, ptr %[[J]], ptr @_copy_i32, i32 %[[DID_IT_VAL2]])
 
 !CHECK:       [[OMP_REGION_BODY]]:
 !CHECK:         br label %[[OMP_SINGLE_REGION:.*]]
 !CHECK:       [[OMP_SINGLE_REGION]]:
-!CHECK:         store i32 11, ptr %[[J]]
-!CHECK:         store i32 22, ptr %[[I]]
+!CHECK:         store i32 11, ptr %[[I]]
+!CHECK:         store i32 22, ptr %[[J]]
 !CHECK:         br label %[[OMP_REGION_CONT3:.*]]
 !CHECK:       [[OMP_REGION_CONT3:.*]]:
 !CHECK:         store i32 1, ptr %[[DID_IT]]
diff --git a/flang/test/Lower/OpenMP/allocatable-array-bounds.f90 b/flang/test/Lower/OpenMP/allocatable-array-bounds.f90
index 62c7f1bc98071e..aeb56a0427e3bb 100644
--- a/flang/test/Lower/OpenMP/allocatable-array-bounds.f90
+++ b/flang/test/Lower/OpenMP/allocatable-array-bounds.f90
@@ -8,24 +8,6 @@
 !HOST: %[[ALLOCA_2:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>> {bindc_name = "sp_write", uniq_name = "_QFread_write_sectionEsp_write"}
 !HOST: %[[DECLARE_2:.*]]:2 = hlfir.declare %[[ALLOCA_2]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFread_write_sectionEsp_write"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
 
-!HOST: %[[LOAD_3:.*]] = fir.load %[[DECLARE_2]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-!HOST: %[[LOAD_4:.*]] = fir.load %[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-!HOST: %[[CONSTANT_5:.*]] = arith.constant 0 : index
-!HOST: %[[BOX_3:.*]]:3 = fir.box_dims %[[LOAD_4]], %[[CONSTANT_5]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-!HOST: %[[CONSTANT_6:.*]] = arith.constant 0 : index
-!HOST: %[[BOX_4:.*]]:3 = fir.box_dims %[[LOAD_3]], %[[CONSTANT_6]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-!HOST: %[[CONSTANT_7:.*]] = arith.constant 2 : index
-!HOST: %[[LB_2:.*]] = arith.subi %[[CONSTANT_7]], %[[BOX_3]]#0 : index
-!HOST: %[[CONSTANT_8:.*]] = arith.constant 5 : index
-!HOST: %[[UB_2:.*]] = arith.subi %[[CONSTANT_8]], %[[BOX_3]]#0 : index
-!HOST: %[[LOAD_5:.*]] = fir.load %[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-!HOST: %[[CONSTANT_5:.*]] = arith.constant 0 : index
-!HOST: %[[BOX_5:.*]]:3 = fir.box_dims %[[LOAD_5]], %[[CONSTANT_5]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-!HOST: %[[BOUNDS_2:.*]] = omp.map.bounds lower_bound(%[[LB_2]] : index) upper_bound(%[[UB_2]] : index) extent(%[[BOX_5]]#1 : index) stride(%[[BOX_4]]#2 : index) start_idx(%[[BOX_3]]#0 : index) {stride_in_bytes = true}
-!HOST: %[[VAR_PTR_PTR:.*]] = fir.box_offset %[[DECLARE_2]]#1 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
-!HOST: %[[MAP_INFO_MEMBER:.*]] = omp.map.info var_ptr(%[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%[[VAR_PTR_PTR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS_2]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
-!HOST: %[[MAP_INFO_2:.*]] = omp.map.info var_ptr(%[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_INFO_MEMBER]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "sp_write(2:5)"}
-
 !HOST: %[[LOAD_1:.*]] = fir.load %[[DECLARE_1]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
 !HOST: %[[LOAD_2:.*]] = fir.load %[[DECLARE_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
 !HOST: %[[CONSTANT_1:.*]] = arith.constant 0 : index
@@ -44,6 +26,24 @@
 !HOST: %[[MAP_INFO_MEMBER:.*]] = omp.map.info var_ptr(%[[DECLARE_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%[[VAR_PTR_PTR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS_1]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}    
 !HOST: %[[MAP_INFO_1:.*]] = omp.map.info var_ptr(%[[DECLARE_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_INFO_MEMBER]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "sp_read(2:5)"}
 
+!HOST: %[[LOAD_3:.*]] = fir.load %[[DECLARE_2]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!HOST: %[[LOAD_4:.*]] = fir.load %[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!HOST: %[[CONSTANT_5:.*]] = arith.constant 0 : index
+!HOST: %[[BOX_3:.*]]:3 = fir.box_dims %[[LOAD_4]], %[[CONSTANT_5]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+!HOST: %[[CONSTANT_6:.*]] = arith.constant 0 : index
+!HOST: %[[BOX_4:.*]]:3 = fir.box_dims %[[LOAD_3]], %[[CONSTANT_6]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+!HOST: %[[CONSTANT_7:.*]] = arith.constant 2 : index
+!HOST: %[[LB_2:.*]] = arith.subi %[[CONSTANT_7]], %[[BOX_3]]#0 : index
+!HOST: %[[CONSTANT_8:.*]] = arith.constant 5 : index
+!HOST: %[[UB_2:.*]] = arith.subi %[[CONSTANT_8]], %[[BOX_3]]#0 : index
+!HOST: %[[LOAD_5:.*]] = fir.load %[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!HOST: %[[CONSTANT_5:.*]] = arith.constant 0 : index
+!HOST: %[[BOX_5:.*]]:3 = fir.box_dims %[[LOAD_5]], %[[CONSTANT_5]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+!HOST: %[[BOUNDS_2:.*]] = omp.map.bounds lower_bound(%[[LB_2]] : index) upper_bound(%[[UB_2]] : index) extent(%[[BOX_5]]#1 : index) stride(%[[BOX_4]]#2 : index) start_idx(%[[BOX_3]]#0 : index) {stride_in_bytes = true}
+!HOST: %[[VAR_PTR_PTR:.*]] = fir.box_offset %[[DECLARE_2]]#1 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
+!HOST: %[[MAP_INFO_MEMBER:.*]] = omp.map.info var_ptr(%[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.array<?xi32>) var_ptr_ptr(%[[VAR_PTR_PTR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS_2]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}    
+!HOST: %[[MAP_INFO_2:.*]] = omp.map.info var_ptr(%[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_INFO_MEMBER]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "sp_write(2:5)"}
+
 subroutine read_write_section()
     integer, allocatable :: sp_read(:)
     integer, allocatable :: sp_write(:)
diff --git a/flang/test/Lower/OpenMP/array-bounds.f90 b/flang/test/Lower/OpenMP/array-bounds.f90
index 00fa51e90832d0..2c8a8999a2cce6 100644
--- a/flang/test/Lower/OpenMP/array-bounds.f90
+++ b/flang/test/Lower/OpenMP/array-bounds.f90
@@ -14,14 +14,14 @@
 !HOST:  %[[C1:.*]] = arith.constant 1 : index
 !HOST:  %[[C2:.*]] = arith.constant 1 : index
 !HOST:  %[[C3:.*]] = arith.constant 4 : index
-!HOST:  %[[BOUNDS1:.*]] = omp.map.bounds   lower_bound(%[[C2]] : index) upper_bound(%[[C3]] : index) extent(%[[C10_0]] : index) stride(%[[C1]] : index) start_idx(%[[C1]] : index)
-!HOST:  %[[MAP1:.*]] = omp.map.info var_ptr(%[[WRITE_DECL]]#0 : !fir.ref<!fir.array<10xi32>>, !fir.array<10xi32>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS1]]) -> !fir.ref<!fir.array<10xi32>> {name = "sp_write(2:5)"}
+!HOST:  %[[BOUNDS0:.*]] = omp.map.bounds   lower_bound(%[[C2]] : index) upper_bound(%[[C3]] : index) extent(%[[C10]] : index) stride(%[[C1]] : index) start_idx(%[[C1]] : index)
+!HOST:  %[[MAP0:.*]] = omp.map.info var_ptr(%[[READ_DECL]]#0 : !fir.ref<!fir.array<10xi32>>, !fir.array<10xi32>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS0]]) -> !fir.ref<!fir.array<10xi32>> {name = "sp_read(2:5)"}
 !HOST:  %[[C4:.*]] = arith.constant 1 : index
 !HOST:  %[[C5:.*]] = arith.constant 1 : index
 !HOST:  %[[C6:.*]] = arith.constant 4 : index
-!HOST:  %[[BOUNDS0:.*]] = omp.map.bounds   lower_bound(%[[C5]] : index) upper_bound(%[[C6]] : index) extent(%[[C10]] : index) stride(%[[C4]] : index) start_idx(%[[C4]] : index)
-!HOST:  %[[MAP0:.*]] = omp.map.info var_ptr(%[[READ_DECL]]#0 : !fir.ref<!fir.array<10xi32>>, !fir.array<10xi32>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS0]]) -> !fir.ref<!fir.array<10xi32>> {name = "sp_read(2:5)"}
-!HOST:  omp.target map_entries(%[[MAP1]] -> %{{.*}}, %[[MAP0]] -> %{{.*}}, {{.*}} -> {{.*}} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>, !fir.ref<i32>) {
+!HOST:  %[[BOUNDS1:.*]] = omp.map.bounds   lower_bound(%[[C5]] : index) upper_bound(%[[C6]] : index) extent(%[[C10_0]] : index) stride(%[[C4]] : index) start_idx(%[[C4]] : index)
+!HOST:  %[[MAP1:.*]] = omp.map.info var_ptr(%[[WRITE_DECL]]#0 : !fir.ref<!fir.array<10xi32>>, !fir.array<10xi32>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS1]]) -> !fir.ref<!fir.array<10xi32>> {name = "sp_write(2:5)"}
+!HOST:  omp.target map_entries(%[[MAP0]] -> %{{.*}}, %[[MAP1]] -> %{{.*}}, {{.*}} -> {{.*}} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>, !fir.ref<i32>) {
 
 subroutine read_write_section()
     integer :: sp_read(10) = (/1,2,3,4,5,6,7,8,9,10/)
diff --git a/flang/test/Lower/OpenMP/copyprivate.f90 b/flang/test/Lower/OpenMP/copyprivate.f90
index c2d935db171ad8..9b76a996ef3e16 100644
--- a/flang/test/Lower/OpenMP/copyprivate.f90
+++ b/flang/test/Lower/OpenMP/copyprivate.f90
@@ -60,18 +60,18 @@ subroutine test_tp()
 
 !CHECK-LABEL: func @_QPtest_scalar
 !CHECK:         omp.parallel
-!CHECK:           %[[S3:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEs3"} : (!fir.ref<!fir.char<2,8>>, index) -> (!fir.ref<!fir.char<2,8>>, !fir.ref<!fir.char<2,8>>)
-!CHECK:           %[[S2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEs2"} : (!fir.ref<!fir.char<1,8>>, index) -> (!fir.ref<!fir.char<1,8>>, !fir.ref<!fir.char<1,8>>)
-!CHECK:           %[[S1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEs1"} : (!fir.ref<!fir.char<1,3>>, index) -> (!fir.ref<!fir.char<1,3>>, !fir.ref<!fir.char<1,3>>)
-!CHECK:           %[[L2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEl2"} : (!fir.ref<!fir.logical<8>>) -> (!fir.ref<!fir.logical<8>>, !fir.ref<!fir.logical<8>>)
-!CHECK:           %[[L1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEl1"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-!CHECK:           %[[C2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEc2"} : (!fir.ref<!fir.complex<8>>) -> (!fir.ref<!fir.complex<8>>, !fir.ref<!fir.complex<8>>)
-!CHECK:           %[[C1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEc1"} : (!fir.ref<!fir.complex<4>>) -> (!fir.ref<!fir.complex<4>>, !fir.ref<!fir.complex<4>>)
-!CHECK:           %[[R2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEr2"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
-!CHECK:           %[[R1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEr1"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:           %[[I3:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi3"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
-!CHECK:           %[[I2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi2"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
 !CHECK:           %[[I1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:           %[[I2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi2"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+!CHECK:           %[[I3:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEi3"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+!CHECK:           %[[R1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEr1"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:           %[[R2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEr2"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+!CHECK:           %[[C1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEc1"} : (!fir.ref<!fir.complex<4>>) -> (!fir.ref<!fir.complex<4>>, !fir.ref<!fir.complex<4>>)
+!CHECK:           %[[C2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEc2"} : (!fir.ref<!fir.complex<8>>) -> (!fir.ref<!fir.complex<8>>, !fir.ref<!fir.complex<8>>)
+!CHECK:           %[[L1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEl1"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+!CHECK:           %[[L2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEl2"} : (!fir.ref<!fir.logical<8>>) -> (!fir.ref<!fir.logical<8>>, !fir.ref<!fir.logical<8>>)
+!CHECK:           %[[S1:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEs1"} : (!fir.ref<!fir.char<1,3>>, index) -> (!fir.ref<!fir.char<1,3>>, !fir.ref<!fir.char<1,3>>)
+!CHECK:           %[[S2:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEs2"} : (!fir.ref<!fir.char<1,8>>, index) -> (!fir.ref<!fir.char<1,8>>, !fir.ref<!fir.char<1,8>>)
+!CHECK:           %[[S3:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_scalarEs3"} : (!fir.ref<!fir.char<2,8>>, index) -> (!fir.ref<!fir.char<2,8>>, !fir.ref<!fir.char<2,8>>)
 !CHECK:           omp.single copyprivate(%[[I1]]#0 -> @_copy_i32 : !fir.ref<i32>, %[[I2]]#0 -> @_copy_i64 : !fir.ref<i64>, %[[I3]]#0 -> @_copy_i64 : !fir.ref<i64>, %[[R1]]#0 -> @_copy_f32 : !fir.ref<f32>, %[[R2]]#0 -> @_copy_f64 : !fir.ref<f64>, %[[C1]]#0 -> @_copy_z32 : !fir.ref<!fir.complex<4>>, %[[C2]]#0 -> @_copy_z64 : !fir.ref<!fir.complex<8>>, %[[L1]]#0 -> @_copy_l32 : !fir.ref<!fir.logical<4>>, %[[L2]]#0 -> @_copy_l64 : !fir.ref<!fir.logical<8>>, %[[S1]]#0 -> @_copy_c8x3 : !fir.ref<!fir.char<1,3>>, %[[S2]]#0 -> @_copy_c8x8 : !fir.ref<!fir.char<1,8>>, %[[S3]]#0 -> @_copy_c16x8 : !fir.ref<!fir.char<2,8>>)
 subroutine test_scalar()
   integer(4) :: i1
@@ -94,15 +94,15 @@ subroutine test_scalar()
 
 !CHECK-LABEL: func @_QPtest_array
 !CHECK:         omp.parallel
-!CHECK:           %[[S2:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_arrayEs2"} : (!fir.ref<!fir.array<3x!fir.char<2,5>>>, !fir.shape<1>, index) -> (!fir.ref<!fir.array<3x!fir.char<2,5>>>, !fir.ref<!fir.array<3x!fir.char<2,5>>>)
-!CHECK:           %[[S1:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_arrayEs1"} : (!fir.ref<!fir.array<3x!fir.char<1,8>>>, !fir.shape<1>, index) -> (!fir.ref<!fir.array<3x!fir.char<1,8>>>, !fir.ref<!fir.array<3x!fir.char<1,8>>>)
-!CHECK:           %[[L1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEl1"} : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.ref<!fir.array<10x!fir.logical<4>>>)
-!CHECK:           %[[C1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEc1"} : (!fir.ref<!fir.array<3x4x!fir.complex<4>>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x4x!fir.complex<4>>>, !fir.ref<!fir.array<3x4x!fir.complex<4>>>)
-!CHECK:           %[[R1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEr1"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
-!CHECK:           %[[I3:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi3"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
-!CHECK:           %[[I2:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi2"} : (!fir.ref<!fir.array<3x4xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x4xi32>>, !fir.ref<!fir.array<3x4xi32>>)
-!CHECK:           %[[I1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi1"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
 !CHECK:           %[[A:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEa"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
+!CHECK:           %[[I1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi1"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+!CHECK:           %[[I2:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi2"} : (!fir.ref<!fir.array<3x4xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x4xi32>>, !fir.ref<!fir.array<3x4xi32>>)
+!CHECK:           %[[I3:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi3"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
+!CHECK:           %[[R1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEr1"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
+!CHECK:           %[[C1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEc1"} : (!fir.ref<!fir.array<3x4x!fir.complex<4>>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x4x!fir.complex<4>>>, !fir.ref<!fir.array<3x4x!fir.complex<4>>>)
+!CHECK:           %[[L1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEl1"} : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.ref<!fir.array<10x!fir.logical<4>>>)
+!CHECK:           %[[S1:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_arrayEs1"} : (!fir.ref<!fir.array<3x!fir.char<1,8>>>, !fir.shape<1>, index) -> (!fir.ref<!fir.array<3x!fir.char<1,8>>>, !fir.ref<!fir.array<3x!fir.char<1,8>>>)
+!CHECK:           %[[S2:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_arrayEs2"} : (!fir.ref<!fir.array<3x!fir.char<2,5>>>, !fir.shape<1>, index) -> (!fir.ref<!fir.array<3x!fir.char<2,5>>>, !fir.ref<!fir.array<3x!fir.char<2,5>>>)
 !CHECK:           %[[A_REF:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
 !CHECK:           fir.store %[[A]]#0 to %[[A_REF]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
 !CHECK:           %[[I3_REF:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
@@ -142,12 +142,12 @@ subroutine test_dt()
 
 !CHECK-LABEL: func @_QPtest_attr
 !CHECK:         omp.parallel
-!CHECK:           %[[C2:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_attrEc2"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,9>>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,9>>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,9>>>>>)
-!CHECK:           %[[C1:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_attrEc1"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>)
-!CHECK:           %[[R1:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_attrEr1"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)
-!CHECK:           %[[I3:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_attrEi3"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
-!CHECK:           %[[I2:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_attrEi2"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
 !CHECK:           %[[I1:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_attrEi1"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+!CHECK:           %[[I2:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_attrEi2"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
+!CHECK:           %[[I3:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_attrEi3"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
+!CHECK:           %[[R1:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_attrEr1"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)
+!CHECK:           %[[C1:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_attrEc1"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>)
+!CHECK:           %[[C2:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_attrEc2"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,9>>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,9>>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,9>>>>>)
 !CHECK:           omp.single copyprivate(%[[I1]]#0 -> @_copy_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %[[I2:.*]]#0 -> @_copy_box_heap_i32 : !fir.ref<!fir.box<!fir.heap<i32>>>, %[[I3]]#0 -> @_copy_box_ptr_i32 : !fir.ref<!fir.box<!fir.ptr<i32>>>, %[[R1]]#0 -> @_copy_box_ptr_Uxf32 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>,  %[[C1]]#0 -> @_copy_box_heap_Uxc8x5 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,5>>>>>, %[[C2]]#0 -> @_copy_box_ptr_Uxc8x9 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.char<1,9>>>>>)
 subroutine test_attr()
   integer, allocatable :: i1(:)
diff --git a/flang/test/Lower/OpenMP/default-clause-byref.f90 b/flang/test/Lower/OpenMP/default-clause-byref.f90
index 50bacb0d683bc5..7cc2bc2e0c710a 100644
--- a/flang/test/Lower/OpenMP/default-clause-byref.f90
+++ b/flang/test/Lower/OpenMP/default-clause-byref.f90
@@ -53,10 +53,10 @@ program default_clause_lowering
     !$omp end parallel
 
 !CHECK: omp.parallel {
-!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
-!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
 !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
 !CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
 !CHECK: omp.terminator
diff --git a/flang/test/Lower/OpenMP/default-clause.f90 b/flang/test/Lower/OpenMP/default-clause.f90
index d690a49093ffc6..843ee6bb7910b3 100644
--- a/flang/test/Lower/OpenMP/default-clause.f90
+++ b/flang/test/Lower/OpenMP/default-clause.f90
@@ -53,10 +53,10 @@ program default_clause_lowering
     !$omp end parallel
 
 !CHECK: omp.parallel {
-!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
-!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
 !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
+!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
 !CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
 !CHECK: omp.terminator
diff --git a/flang/test/Lower/OpenMP/parallel-firstprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/parallel-firstprivate-clause-scalar.f90
index 0741d3d28feba3..6402f98a2addc4 100644
--- a/flang/test/Lower/OpenMP/parallel-firstprivate-clause-scalar.f90
+++ b/flang/test/Lower/OpenMP/parallel-firstprivate-clause-scalar.f90
@@ -7,14 +7,14 @@
 !CHECK:    %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFfirstprivate_complexEarg1"} : (!fir.ref<!fir.complex<4>>) -> (!fir.ref<!fir.complex<4>>, !fir.ref<!fir.complex<4>>)
 !CHECK:    %[[ARG2_DECL:.*]]:2 = hlfir.declare %[[ARG2]] {uniq_name = "_QFfirstprivate_complexEarg2"} : (!fir.ref<!fir.complex<8>>) -> (!fir.ref<!fir.complex<8>>, !fir.ref<!fir.complex<8>>)
 !CHECK:   omp.parallel {
-!CHECK:     %[[ARG2_PVT:.*]] = fir.alloca !fir.complex<8> {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_complexEarg2"}
-!CHECK:     %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_complexEarg2"} : (!fir.ref<!fir.complex<8>>) -> (!fir.ref<!fir.complex<8>>, !fir.ref<!fir.complex<8>>)
-!CHECK:     %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref<!fir.complex<8>>
-!CHECK:     hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : !fir.complex<8>, !fir.ref<!fir.complex<8>>
 !CHECK:     %[[ARG1_PVT:.*]] = fir.alloca !fir.complex<4> {bindc_name = "arg1", pinned, uniq_name = "_QFfirstprivate_complexEarg1"}
 !CHECK:     %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] {uniq_name = "_QFfirstprivate_complexEarg1"} : (!fir.ref<!fir.complex<4>>) -> (!fir.ref<!fir.complex<4>>, !fir.ref<!fir.complex<4>>)
 !CHECK:     %[[ARG1_VAL:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<!fir.complex<4>>
 !CHECK:      hlfir.assign %[[ARG1_VAL]] to %[[ARG1_PVT_DECL]]#0 temporary_lhs : !fir.complex<4>, !fir.ref<!fir.complex<4>>
+!CHECK:     %[[ARG2_PVT:.*]] = fir.alloca !fir.complex<8> {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_complexEarg2"}
+!CHECK:     %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_complexEarg2"} : (!fir.ref<!fir.complex<8>>) -> (!fir.ref<!fir.complex<8>>, !fir.ref<!fir.complex<8>>)
+!CHECK:     %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref<!fir.complex<8>>
+!CHECK:     hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : !fir.complex<8>, !fir.ref<!fir.complex<8>>
 !CHECK:     fir.call @_QPfoo(%[[ARG1_PVT_DECL]]#1, %[[ARG2_PVT_DECL]]#1) {{.*}}: (!fir.ref<!fir.complex<4>>, !fir.ref<!fir.complex<8>>) -> ()
 !CHECK:     omp.terminator
 !CHECK:   }
@@ -37,30 +37,30 @@ subroutine firstprivate_complex(arg1, arg2)
 !CHECK:  %[[ARG5_DECL:.*]]:2 = hlfir.declare %[[ARG5]] {uniq_name = "_QFfirstprivate_integerEarg5"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
 !CHECK:  %[[ARG6_DECL:.*]]:2 = hlfir.declare %[[ARG6]] {uniq_name = "_QFfirstprivate_integerEarg6"} : (!fir.ref<i128>) -> (!fir.ref<i128>, !fir.ref<i128>)
 !CHECK:  omp.parallel {
-!CHECK:    %[[ARG6_PVT:.*]] = fir.alloca i128 {bindc_name = "arg6", pinned, uniq_name = "_QFfirstprivate_integerEarg6"}
-!CHECK:    %[[ARG6_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG6_PVT]] {uniq_name = "_QFfirstprivate_integerEarg6"} : (!fir.ref<i128>) -> (!fir.ref<i128>, !fir.ref<i128>)
-!CHECK:    %[[ARG6_VAL:.*]] = fir.load %[[ARG6_DECL]]#0 : !fir.ref<i128>
-!CHECK:    hlfir.assign %[[ARG6_VAL]] to %[[ARG6_PVT_DECL]]#0 temporary_lhs : i128, !fir.ref<i128>
-!CHECK:    %[[ARG5_PVT:.*]] = fir.alloca i64 {bindc_name = "arg5", pinned, uniq_name = "_QFfirstprivate_integerEarg5"}
-!CHECK:    %[[ARG5_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG5_PVT]] {uniq_name = "_QFfirstprivate_integerEarg5"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
-!CHECK:    %[[ARG5_VAL:.*]] = fir.load %[[ARG5_DECL]]#0 : !fir.ref<i64>
-!CHECK:    hlfir.assign %[[ARG5_VAL]] to %[[ARG5_PVT_DECL]]#0 temporary_lhs : i64, !fir.ref<i64>
-!CHECK:    %[[ARG4_PVT:.*]] = fir.alloca i32 {bindc_name = "arg4", pinned, uniq_name = "_QFfirstprivate_integerEarg4"}
-!CHECK:    %[[ARG4_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG4_PVT]] {uniq_name = "_QFfirstprivate_integerEarg4"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:    %[[ARG4_VAL:.*]] = fir.load %[[ARG4_DECL]]#0 : !fir.ref<i32>
-!CHECK:    hlfir.assign %[[ARG4_VAL]] to %[[ARG4_PVT_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
-!CHECK:    %[[ARG3_PVT:.*]] = fir.alloca i16 {bindc_name = "arg3", pinned, uniq_name = "_QFfirstprivate_integerEarg3"}
-!CHECK:    %[[ARG3_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG3_PVT]] {uniq_name = "_QFfirstprivate_integerEarg3"} : (!fir.ref<i16>) -> (!fir.ref<i16>, !fir.ref<i16>)
-!CHECK:    %[[ARG3_VAL:.*]] = fir.load %[[ARG3_DECL]]#0 : !fir.ref<i16>
-!CHECK:    hlfir.assign %[[ARG3_VAL]] to %[[ARG3_PVT_DECL]]#0 temporary_lhs : i16, !fir.ref<i16>
-!CHECK:    %[[ARG2_PVT:.*]] = fir.alloca i8 {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_integerEarg2"}
-!CHECK:    %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_integerEarg2"} : (!fir.ref<i8>) -> (!fir.ref<i8>, !fir.ref<i8>)
-!CHECK:    %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref<i8>
-!CHECK:    hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : i8, !fir.ref<i8>
 !CHECK:    %[[ARG1_PVT:.*]] = fir.alloca i32 {bindc_name = "arg1", pinned, uniq_name = "_QFfirstprivate_integerEarg1"}
 !CHECK:    %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] {uniq_name = "_QFfirstprivate_integerEarg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:    %[[ARG1_VAL:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<i32>
 !CHECK:    hlfir.assign %[[ARG1_VAL]] to %[[ARG1_PVT_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK:    %[[ARG2_PVT:.*]] = fir.alloca i8 {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_integerEarg2"}
+!CHECK:    %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_integerEarg2"} : (!fir.ref<i8>) -> (!fir.ref<i8>, !fir.ref<i8>)
+!CHECK:    %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref<i8>
+!CHECK:    hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : i8, !fir.ref<i8>
+!CHECK:    %[[ARG3_PVT:.*]] = fir.alloca i16 {bindc_name = "arg3", pinned, uniq_name = "_QFfirstprivate_integerEarg3"}
+!CHECK:    %[[ARG3_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG3_PVT]] {uniq_name = "_QFfirstprivate_integerEarg3"} : (!fir.ref<i16>) -> (!fir.ref<i16>, !fir.ref<i16>)
+!CHECK:    %[[ARG3_VAL:.*]] = fir.load %[[ARG3_DECL]]#0 : !fir.ref<i16>
+!CHECK:    hlfir.assign %[[ARG3_VAL]] to %[[ARG3_PVT_DECL]]#0 temporary_lhs : i16, !fir.ref<i16>
+!CHECK:    %[[ARG4_PVT:.*]] = fir.alloca i32 {bindc_name = "arg4", pinned, uniq_name = "_QFfirstprivate_integerEarg4"}
+!CHECK:    %[[ARG4_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG4_PVT]] {uniq_name = "_QFfirstprivate_integerEarg4"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:    %[[ARG4_VAL:.*]] = fir.load %[[ARG4_DECL]]#0 : !fir.ref<i32>
+!CHECK:    hlfir.assign %[[ARG4_VAL]] to %[[ARG4_PVT_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK:    %[[ARG5_PVT:.*]] = fir.alloca i64 {bindc_name = "arg5", pinned, uniq_name = "_QFfirstprivate_integerEarg5"}
+!CHECK:    %[[ARG5_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG5_PVT]] {uniq_name = "_QFfirstprivate_integerEarg5"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+!CHECK:    %[[ARG5_VAL:.*]] = fir.load %[[ARG5_DECL]]#0 : !fir.ref<i64>
+!CHECK:    hlfir.assign %[[ARG5_VAL]] to %[[ARG5_PVT_DECL]]#0 temporary_lhs : i64, !fir.ref<i64>
+!CHECK:    %[[ARG6_PVT:.*]] = fir.alloca i128 {bindc_name = "arg6", pinned, uniq_name = "_QFfirstprivate_integerEarg6"}
+!CHECK:    %[[ARG6_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG6_PVT]] {uniq_name = "_QFfirstprivate_integerEarg6"} : (!fir.ref<i128>) -> (!fir.ref<i128>, !fir.ref<i128>)
+!CHECK:    %[[ARG6_VAL:.*]] = fir.load %[[ARG6_DECL]]#0 : !fir.ref<i128>
+!CHECK:    hlfir.assign %[[ARG6_VAL]] to %[[ARG6_PVT_DECL]]#0 temporary_lhs : i128, !fir.ref<i128>
 !CHECK:    fir.call @_QPbar(%[[ARG1_PVT_DECL]]#1, %[[ARG2_PVT_DECL]]#1, %[[ARG3_PVT_DECL]]#1, %[[ARG4_PVT_DECL]]#1,
 !%[[ARG5_PVT_DECL]]#1, %[[ARG6_PVT_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i8>, !fir.ref<i16>, !fir.ref<i32>, !fir.ref<i64>, !fir.ref<i128>) -> ()
 !CHECK:    omp.terminator
@@ -87,26 +87,26 @@ subroutine firstprivate_integer(arg1, arg2, arg3, arg4, arg5, arg6)
 !CHECK:    %[[ARG4_DECL:.*]]:2 = hlfir.declare %[[ARG4]] {uniq_name = "_QFfirstprivate_logicalEarg4"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 !CHECK:    %[[ARG5_DECL:.*]]:2 = hlfir.declare %[[ARG5]] {uniq_name = "_QFfirstprivate_logicalEarg5"} : (!fir.ref<!fir.logical<8>>) -> (!fir.ref<!fir.logical<8>>, !fir.ref<!fir.logical<8>>)
 !CHECK:   omp.parallel {
-!CHECK:     %[[ARG5_PVT:.*]] = fir.alloca !fir.logical<8> {bindc_name = "arg5", pinned, uniq_name = "_QFfirstprivate_logicalEarg5"}
-!CHECK:     %[[ARG5_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG5_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg5"} : (!fir.ref<!fir.logical<8>>) -> (!fir.ref<!fir.logical<8>>, !fir.ref<!fir.logical<8>>)
-!CHECK:     %[[ARG5_VAL:.*]] = fir.load %[[ARG5_DECL]]#0 : !fir.ref<!fir.logical<8>>
-!CHECK:     hlfir.assign %[[ARG5_VAL]] to %[[ARG5_PVT_DECL]]#0 temporary_lhs : !fir.logical<8>, !fir.ref<!fir.logical<8>>
-!CHECK:     %[[ARG4_PVT:.*]] = fir.alloca !fir.logical<4> {bindc_name = "arg4", pinned, uniq_name = "_QFfirstprivate_logicalEarg4"}
-!CHECK:     %[[ARG4_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG4_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg4"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-!CHECK:     %[[ARG4_VAL:.*]] = fir.load %[[ARG4_DECL]]#0 : !fir.ref<!fir.logical<4>>
-!CHECK:     hlfir.assign %[[ARG4_VAL]] to %[[ARG4_PVT_DECL]]#0 temporary_lhs : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-!CHECK:     %[[ARG3_PVT:.*]] = fir.alloca !fir.logical<2> {bindc_name = "arg3", pinned, uniq_name = "_QFfirstprivate_logicalEarg3"}
-!CHECK:     %[[ARG3_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG3_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg3"} : (!fir.ref<!fir.logical<2>>) -> (!fir.ref<!fir.logical<2>>, !fir.ref<!fir.logical<2>>)
-!CHECK:     %[[ARG3_VAL:.*]] = fir.load %[[ARG3_DECL]]#0 : !fir.ref<!fir.logical<2>>
-!CHECK:     hlfir.assign %[[ARG3_VAL]] to %[[ARG3_PVT_DECL]]#0 temporary_lhs : !fir.logical<2>, !fir.ref<!fir.logical<2>>
-!CHECK:     %[[ARG2_PVT:.*]] = fir.alloca !fir.logical<1> {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_logicalEarg2"}
-!CHECK:     %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg2"} : (!fir.ref<!fir.logical<1>>) -> (!fir.ref<!fir.logical<1>>, !fir.ref<!fir.logical<1>>)
-!CHECK:     %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref<!fir.logical<1>>
-!CHECK:     hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : !fir.logical<1>, !fir.ref<!fir.logical<1>>
 !CHECK:     %[[ARG1_PVT:.*]] = fir.alloca !fir.logical<4> {bindc_name = "arg1", pinned, uniq_name = "_QFfirstprivate_logicalEarg1"}
 !CHECK:     %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg1"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 !CHECK:     %[[ARG1_VAL:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<!fir.logical<4>>
 !CHECK:     hlfir.assign %[[ARG1_VAL]] to %[[ARG1_PVT_DECL]]#0 temporary_lhs : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+!CHECK:     %[[ARG2_PVT:.*]] = fir.alloca !fir.logical<1> {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_logicalEarg2"}
+!CHECK:     %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg2"} : (!fir.ref<!fir.logical<1>>) -> (!fir.ref<!fir.logical<1>>, !fir.ref<!fir.logical<1>>)
+!CHECK:     %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref<!fir.logical<1>>
+!CHECK:     hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : !fir.logical<1>, !fir.ref<!fir.logical<1>>
+!CHECK:     %[[ARG3_PVT:.*]] = fir.alloca !fir.logical<2> {bindc_name = "arg3", pinned, uniq_name = "_QFfirstprivate_logicalEarg3"}
+!CHECK:     %[[ARG3_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG3_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg3"} : (!fir.ref<!fir.logical<2>>) -> (!fir.ref<!fir.logical<2>>, !fir.ref<!fir.logical<2>>)
+!CHECK:     %[[ARG3_VAL:.*]] = fir.load %[[ARG3_DECL]]#0 : !fir.ref<!fir.logical<2>>
+!CHECK:     hlfir.assign %[[ARG3_VAL]] to %[[ARG3_PVT_DECL]]#0 temporary_lhs : !fir.logical<2>, !fir.ref<!fir.logical<2>>
+!CHECK:     %[[ARG4_PVT:.*]] = fir.alloca !fir.logical<4> {bindc_name = "arg4", pinned, uniq_name = "_QFfirstprivate_logicalEarg4"}
+!CHECK:     %[[ARG4_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG4_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg4"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+!CHECK:     %[[ARG4_VAL:.*]] = fir.load %[[ARG4_DECL]]#0 : !fir.ref<!fir.logical<4>>
+!CHECK:     hlfir.assign %[[ARG4_VAL]] to %[[ARG4_PVT_DECL]]#0 temporary_lhs : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+!CHECK:     %[[ARG5_PVT:.*]] = fir.alloca !fir.logical<8> {bindc_name = "arg5", pinned, uniq_name = "_QFfirstprivate_logicalEarg5"}
+!CHECK:     %[[ARG5_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG5_PVT]] {uniq_name = "_QFfirstprivate_logicalEarg5"} : (!fir.ref<!fir.logical<8>>) -> (!fir.ref<!fir.logical<8>>, !fir.ref<!fir.logical<8>>)
+!CHECK:     %[[ARG5_VAL:.*]] = fir.load %[[ARG5_DECL]]#0 : !fir.ref<!fir.logical<8>>
+!CHECK:     hlfir.assign %[[ARG5_VAL]] to %[[ARG5_PVT_DECL]]#0 temporary_lhs : !fir.logical<8>, !fir.ref<!fir.logical<8>>
 !CHECK:     fir.call @_QPbaz(%[[ARG1_PVT_DECL]]#1, %[[ARG2_PVT_DECL]]#1, %[[ARG3_PVT_DECL]]#1, %[[ARG4_PVT_DECL]]#1, %[[ARG5_PVT_DECL]]#1) {{.*}}: (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<1>>, !fir.ref<!fir.logical<2>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<8>>) -> ()
 !CHECK:     omp.terminator
 !CHECK:   }
@@ -132,30 +132,30 @@ subroutine firstprivate_logical(arg1, arg2, arg3, arg4, arg5)
 !CHECK:   %[[ARG5_DECL:.*]]:2 = hlfir.declare %[[ARG5]] {uniq_name = "_QFfirstprivate_realEarg5"} : (!fir.ref<f80>) -> (!fir.ref<f80>, !fir.ref<f80>)
 !CHECK:   %[[ARG6_DECL:.*]]:2 = hlfir.declare %[[ARG6]] {uniq_name = "_QFfirstprivate_realEarg6"} : (!fir.ref<f128>) -> (!fir.ref<f128>, !fir.ref<f128>)
 !CHECK:   omp.parallel {
-!CHECK:     %[[ARG6_PVT:.*]] = fir.alloca f128 {bindc_name = "arg6", pinned, uniq_name = "_QFfirstprivate_realEarg6"}
-!CHECK:     %[[ARG6_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG6_PVT]] {uniq_name = "_QFfirstprivate_realEarg6"} : (!fir.ref<f128>) -> (!fir.ref<f128>, !fir.ref<f128>)
-!CHECK:     %[[ARG6_VAL:.*]] = fir.load %[[ARG6_DECL]]#0 : !fir.ref<f128>
-!CHECK:     hlfir.assign %[[ARG6_VAL]] to %[[ARG6_PVT_DECL]]#0 temporary_lhs : f128, !fir.ref<f128>
-!CHECK:     %[[ARG5_PVT:.*]] = fir.alloca f80 {bindc_name = "arg5", pinned, uniq_name = "_QFfirstprivate_realEarg5"}
-!CHECK:     %[[ARG5_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG5_PVT]] {uniq_name = "_QFfirstprivate_realEarg5"} : (!fir.ref<f80>) -> (!fir.ref<f80>, !fir.ref<f80>)
-!CHECK:     %[[ARG5_VAL:.*]] = fir.load %[[ARG5_DECL]]#0 : !fir.ref<f80>
-!CHECK:     hlfir.assign %[[ARG5_VAL]] to %[[ARG5_PVT_DECL]]#0 temporary_lhs : f80, !fir.ref<f80>
-!CHECK:     %[[ARG4_PVT:.*]] = fir.alloca f64 {bindc_name = "arg4", pinned, uniq_name = "_QFfirstprivate_realEarg4"}
-!CHECK:     %[[ARG4_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG4_PVT]] {uniq_name = "_QFfirstprivate_realEarg4"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
-!CHECK:     %[[ARG4_VAL:.*]] = fir.load %[[ARG4_DECL]]#0 : !fir.ref<f64>
-!CHECK:     hlfir.assign %[[ARG4_VAL]] to %[[ARG4_PVT_DECL]]#0 temporary_lhs : f64, !fir.ref<f64>
-!CHECK:     %[[ARG3_PVT:.*]] = fir.alloca f32 {bindc_name = "arg3", pinned, uniq_name = "_QFfirstprivate_realEarg3"}
-!CHECK:     %[[ARG3_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG3_PVT]] {uniq_name = "_QFfirstprivate_realEarg3"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:     %[[ARG3_VAL:.*]] = fir.load %[[ARG3_DECL]]#0 : !fir.ref<f32>
-!CHECK:     hlfir.assign %[[ARG3_VAL]] to %[[ARG3_PVT_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
-!CHECK:     %[[ARG2_PVT:.*]] = fir.alloca f16 {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_realEarg2"}
-!CHECK:     %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_realEarg2"} : (!fir.ref<f16>) -> (!fir.ref<f16>, !fir.ref<f16>)
-!CHECK:     %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref<f16>
-!CHECK:     hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : f16, !fir.ref<f16>
 !CHECK:     %[[ARG1_PVT:.*]] = fir.alloca f32 {bindc_name = "arg1", pinned, uniq_name = "_QFfirstprivate_realEarg1"}
 !CHECK:     %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] {uniq_name = "_QFfirstprivate_realEarg1"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 !CHECK:     %[[ARG1_VAL:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<f32>
 !CHECK:     hlfir.assign %[[ARG1_VAL]] to %[[ARG1_PVT_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
+!CHECK:     %[[ARG2_PVT:.*]] = fir.alloca f16 {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_realEarg2"}
+!CHECK:     %[[ARG2_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG2_PVT]] {uniq_name = "_QFfirstprivate_realEarg2"} : (!fir.ref<f16>) -> (!fir.ref<f16>, !fir.ref<f16>)
+!CHECK:     %[[ARG2_VAL:.*]] = fir.load %[[ARG2_DECL]]#0 : !fir.ref<f16>
+!CHECK:     hlfir.assign %[[ARG2_VAL]] to %[[ARG2_PVT_DECL]]#0 temporary_lhs : f16, !fir.ref<f16>
+!CHECK:     %[[ARG3_PVT:.*]] = fir.alloca f32 {bindc_name = "arg3", pinned, uniq_name = "_QFfirstprivate_realEarg3"}
+!CHECK:     %[[ARG3_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG3_PVT]] {uniq_name = "_QFfirstprivate_realEarg3"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:     %[[ARG3_VAL:.*]] = fir.load %[[ARG3_DECL]]#0 : !fir.ref<f32>
+!CHECK:     hlfir.assign %[[ARG3_VAL]] to %[[ARG3_PVT_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
+!CHECK:     %[[ARG4_PVT:.*]] = fir.alloca f64 {bindc_name = "arg4", pinned, uniq_name = "_QFfirstprivate_realEarg4"}
+!CHECK:     %[[ARG4_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG4_PVT]] {uniq_name = "_QFfirstprivate_realEarg4"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+!CHECK:     %[[ARG4_VAL:.*]] = fir.load %[[ARG4_DECL]]#0 : !fir.ref<f64>
+!CHECK:     hlfir.assign %[[ARG4_VAL]] to %[[ARG4_PVT_DECL]]#0 temporary_lhs : f64, !fir.ref<f64>
+!CHECK:     %[[ARG5_PVT:.*]] = fir.alloca f80 {bindc_name = "arg5", pinned, uniq_name = "_QFfirstprivate_realEarg5"}
+!CHECK:     %[[ARG5_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG5_PVT]] {uniq_name = "_QFfirstprivate_realEarg5"} : (!fir.ref<f80>) -> (!fir.ref<f80>, !fir.ref<f80>)
+!CHECK:     %[[ARG5_VAL:.*]] = fir.load %[[ARG5_DECL]]#0 : !fir.ref<f80>
+!CHECK:     hlfir.assign %[[ARG5_VAL]] to %[[ARG5_PVT_DECL]]#0 temporary_lhs : f80, !fir.ref<f80>
+!CHECK:     %[[ARG6_PVT:.*]] = fir.alloca f128 {bindc_name = "arg6", pinned, uniq_name = "_QFfirstprivate_realEarg6"}
+!CHECK:     %[[ARG6_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG6_PVT]] {uniq_name = "_QFfirstprivate_realEarg6"} : (!fir.ref<f128>) -> (!fir.ref<f128>, !fir.ref<f128>)
+!CHECK:     %[[ARG6_VAL:.*]] = fir.load %[[ARG6_DECL]]#0 : !fir.ref<f128>
+!CHECK:     hlfir.assign %[[ARG6_VAL]] to %[[ARG6_PVT_DECL]]#0 temporary_lhs : f128, !fir.ref<f128>
 !CHECK:     fir.call @_QPqux(%[[ARG1_PVT_DECL]]#1, %[[ARG2_PVT_DECL]]#1, %[[ARG3_PVT_DECL]]#1, %[[ARG4_PVT_DECL]]#1, %[[ARG5_PVT_DECL]]#1, %[[ARG6_PVT_DECL]]#1) {{.*}}: (!fir.ref<f32>, !fir.ref<f16>, !fir.ref<f32>, !fir.ref<f64>, !fir.ref<f80>, !fir.ref<f128>) -> ()
 !CHECK:     omp.terminator
 !CHECK:   }
diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
index 474fab379cd490..ac8b9f50f54e67 100644
--- a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
+++ b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
@@ -47,14 +47,14 @@ subroutine omp_do_firstprivate2(a, n)
   ! CHECK:  omp.parallel {
   ! CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
   ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_firstprivate2Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  ! CHECK: %[[N_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "n", pinned, uniq_name = "_QFomp_do_firstprivate2En"}
-  ! CHECK: %[[N_PVT_DECL:.*]]:2 = hlfir.declare %[[N_PVT_REF]] {uniq_name = "_QFomp_do_firstprivate2En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  ! CHECK: %[[LD1:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<i32>
-  ! CHECK: hlfir.assign %[[LD1]] to %[[N_PVT_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
   ! CHECK: %[[A_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "a", pinned
   ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_firstprivate2Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   ! CHECK: %[[LD:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
   ! CHECK: hlfir.assign %[[LD]] to %[[A_PVT_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
+  ! CHECK: %[[N_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "n", pinned, uniq_name = "_QFomp_do_firstprivate2En"}
+  ! CHECK: %[[N_PVT_DECL:.*]]:2 = hlfir.declare %[[N_PVT_REF]] {uniq_name = "_QFomp_do_firstprivate2En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  ! CHECK: %[[LD1:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<i32>
+  ! CHECK: hlfir.assign %[[LD1]] to %[[N_PVT_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
 
 
   ! CHECK: %[[LB:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90
index c2be161b83e2b2..429253efdc8090 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90
@@ -1,6 +1,17 @@
 ! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
 ! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
 
+!CHECK-LABEL: omp.declare_reduction
+!CHECK-SAME: @[[MIN_RED_I32_NAME:.*]] : i32 init {
+!CHECK: ^bb0(%{{.*}}: i32):
+!CHECK:  %[[C0_1:.*]] = arith.constant 2147483647 : i32
+!CHECK:  omp.yield(%[[C0_1]] : i32)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32):
+!CHECK:  %[[RES:.*]] = arith.minsi %[[ARG0]], %[[ARG1]] : i32
+!CHECK:  omp.yield(%[[RES]] : i32)
+!CHECK: }
+
 !CHECK-LABEL: omp.declare_reduction
 !CHECK-SAME: @[[ADD_RED_F32_NAME:.*]] : f32 init {
 !CHECK: ^bb0(%{{.*}}: f32):
@@ -23,17 +34,6 @@
 !CHECK:  omp.yield(%[[RES]] : i32)
 !CHECK: }
 
-!CHECK-LABEL: omp.declare_reduction
-!CHECK-SAME: @[[MIN_RED_I32_NAME:.*]] : i32 init {
-!CHECK: ^bb0(%{{.*}}: i32):
-!CHECK:  %[[C0_1:.*]] = arith.constant 2147483647 : i32
-!CHECK:  omp.yield(%[[C0_1]] : i32)
-!CHECK: } combiner {
-!CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32):
-!CHECK:  %[[RES:.*]] = arith.minsi %[[ARG0]], %[[ARG1]] : i32
-!CHECK:  omp.yield(%[[RES]] : i32)
-!CHECK: }
-
 !CHECK-LABEL: func.func @_QPmultiple_reduction
 !CHECK:      %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductionEx"}
 !CHECK:      %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFmultiple_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -42,13 +42,13 @@
 !CHECK:      %[[Z_REF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_reductionEz"}
 !CHECK:      %[[Z_DECL:.*]]:2 = hlfir.declare %[[Z_REF]] {uniq_name = "_QFmultiple_reductionEz"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:      omp.wsloop reduction(
-!CHECK-SAME: @[[MIN_RED_I32_NAME]] %[[Z_DECL]]#0 -> %[[PRV_Z:[a-z0-9]+]] : !fir.ref<i32>,
-!CHECK-SAME: @[[ADD_RED_I32_NAME]] %[[X_DECL]]#0 -> %[[PRV_X:[a-z0-9]+]] : !fir.ref<i32>,
-!CHECK-SAME: @[[ADD_RED_F32_NAME]] %[[Y_DECL]]#0 -> %[[PRV_Y:[a-z0-9]+]] : !fir.ref<f32>) {
+!CHECK-SAME: @[[ADD_RED_I32_NAME]] %[[X_DECL]]#0 -> %[[PRV_X:.+]] : !fir.ref<i32>,
+!CHECK-SAME: @[[ADD_RED_F32_NAME]] %[[Y_DECL]]#0 -> %[[PRV_Y:.+]] : !fir.ref<f32>,
+!CHECK-SAME: @[[MIN_RED_I32_NAME]] %[[Z_DECL]]#0 -> %[[PRV_Z:.+]] : !fir.ref<i32>) {
 !CHECK-NEXT:   omp.loop_nest {{.*}} {
-!CHECK:          %[[PRV_Z_DECL:.+]]:2 = hlfir.declare %[[PRV_Z]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:          %[[PRV_X_DECL:.+]]:2 = hlfir.declare %[[PRV_X]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:          %[[PRV_Y_DECL:.+]]:2 = hlfir.declare %[[PRV_Y]] {{.*}} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+!CHECK:          %[[PRV_Z_DECL:.+]]:2 = hlfir.declare %[[PRV_Z]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:          %[[LPRV_X:.+]] = fir.load %[[PRV_X_DECL]]#0 : !fir.ref<i32>
 !CHECK:          %[[RES_X:.+]] = arith.addi %[[LPRV_X]], %{{.+}} : i32
 !CHECK:          hlfir.assign %[[RES_X]] to %[[PRV_X_DECL]]#0 : i32, !fir.ref<i32>
diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructCompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructCompositionT.h
index 925fb8956a4dd2..7a4ed92a10703f 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ConstructCompositionT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructCompositionT.h
@@ -286,12 +286,6 @@ template <typename C> void ConstructCompositionT<C>::mergeReduction() {
 template <typename C> void ConstructCompositionT<C>::mergeDSA() {
   using ObjectTy = tomp::type::ObjectT<IdTy, ExprTy>;
 
-  struct Hash {
-    bool operator()(const ObjectTy &object) const {
-      return std::hash<llvm::remove_cvref_t<IdTy>>()(object.id());
-    }
-  };
-
   // Resolve data-sharing attributes.
   enum DSA : int {
     None = 0,
@@ -302,9 +296,17 @@ template <typename C> void ConstructCompositionT<C>::mergeDSA() {
     LastPrivateConditional = 1 << 4,
   };
 
-  // The operator[] will value-initialize (i.e. zero-initialize) the "int"
-  // mapped-value if the given key is not present.
-  std::unordered_map<ObjectTy, int, Hash> objectDsa;
+  // Use ordered containers to avoid non-deterministic output.
+  llvm::SmallVector<std::pair<ObjectTy, int>> objectDsa;
+
+  auto getDsa = [&](const ObjectTy &object) -> std::pair<ObjectTy, int> & {
+    auto found = llvm::find_if(objectDsa, [&](std::pair<ObjectTy, int> &p) {
+      return p.first.id() == object.id();
+    });
+    if (found != objectDsa.end())
+      return *found;
+    return objectDsa.emplace_back(object, DSA::None);
+  };
 
   using SharedTy = tomp::clause::SharedT<TypeTy, IdTy, ExprTy>;
   using PrivateTy = tomp::clause::PrivateT<TypeTy, IdTy, ExprTy>;
@@ -314,17 +316,17 @@ template <typename C> void ConstructCompositionT<C>::mergeDSA() {
   // Visit clauses that affect DSA.
   for (auto &clause : clauseSets[llvm::omp::Clause::OMPC_shared]) {
     for (auto &object : std::get<SharedTy>(clause.u).v)
-      objectDsa[object] |= DSA::Shared;
+      getDsa(object).second |= DSA::Shared;
   }
 
   for (auto &clause : clauseSets[llvm::omp::Clause::OMPC_private]) {
     for (auto &object : std::get<PrivateTy>(clause.u).v)
-      objectDsa[object] |= DSA::Private;
+      getDsa(object).second |= DSA::Private;
   }
 
   for (auto &clause : clauseSets[llvm::omp::Clause::OMPC_firstprivate]) {
     for (auto &object : std::get<FirstprivateTy>(clause.u).v)
-      objectDsa[object] |= DSA::FirstPrivate;
+      getDsa(object).second |= DSA::FirstPrivate;
   }
 
   for (auto &clause : clauseSets[llvm::omp::Clause::OMPC_lastprivate]) {
@@ -334,9 +336,9 @@ template <typename C> void ConstructCompositionT<C>::mergeDSA() {
     for (auto &object : std::get<ListTy>(lastp.t)) {
       auto &mod = std::get<std::optional<ModifierTy>>(lastp.t);
       if (mod && *mod == ModifierTy::Conditional) {
-        objectDsa[object] |= DSA::LastPrivateConditional;
+        getDsa(object).second |= DSA::LastPrivateConditional;
       } else {
-        objectDsa[object] |= DSA::LastPrivate;
+        getDsa(object).second |= DSA::LastPrivate;
       }
     }
   }
@@ -346,7 +348,7 @@ template <typename C> void ConstructCompositionT<C>::mergeDSA() {
     using ReductionTy = tomp::clause::ReductionT<TypeTy, IdTy, ExprTy>;
     using ListTy = typename ReductionTy::List;
     for (auto &object : std::get<ListTy>(std::get<ReductionTy>(clause.u).t))
-      objectDsa[object] &= ~DSA::Shared;
+      getDsa(object).second &= ~DSA::Shared;
   }
 
   tomp::ListT<ObjectTy> privateObj, sharedObj, firstpObj, lastpObj, lastpcObj;

>From 0c4e35ccdd2bd4fdc1baf33b83f73148630e0e9e Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 7 May 2024 10:25:44 -0500
Subject: [PATCH 15/15] Add tile and unroll

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 7b5d407740d7ef..2e92b636141c97 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1965,6 +1965,8 @@ static void genOMPDispatch(Fortran::lower::AbstractConverter &converter,
     break;
   case llvm::omp::Directive::OMPD_loop:
   case llvm::omp::Directive::OMPD_masked:
+  case llvm::omp::Directive::OMPD_tile:
+  case llvm::omp::Directive::OMPD_unroll:
     TODO(loc, "Unhandled loop directive (" +
                   llvm::omp::getOpenMPDirectiveName(dir) + ")");
     break;



More information about the flang-commits mailing list