[flang] [llvm] [flang][OpenMP] Decompose compound constructs, do recursive lowering (PR #90098)

Krzysztof Parzyszek via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 30 11:02:57 PDT 2024


https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/90098

>From 4cf41a60afacefbbc7098309ba1f099b867a8806 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 26 Mar 2024 09:58:18 -0500
Subject: [PATCH 1/8] [flang][OpenMP] Implement getOpenMPVersion helper
 function, NFC

---
 flang/lib/Lower/OpenMP/Utils.cpp | 6 ++++++
 flang/lib/Lower/OpenMP/Utils.h   | 1 +
 2 files changed, 7 insertions(+)

diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index da3f2be73e5095..1400f1f4cc8dd2 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -47,6 +47,12 @@ int64_t getCollapseValue(const List<Clause> &clauses) {
   return 1;
 }
 
+uint32_t getOpenMPVersion(mlir::ModuleOp mod) {
+  if (mlir::Attribute verAttr = mod->getAttr("omp.version"))
+    return llvm::cast<mlir::omp::VersionAttr>(verAttr).getVersion();
+  llvm_unreachable("Expecting OpenMP version attribute in module");
+}
+
 void genObjectList(const ObjectList &objects,
                    Fortran::lower::AbstractConverter &converter,
                    llvm::SmallVectorImpl<mlir::Value> &operands) {
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index b3a9f7f30c98bd..693a91f7cc6954 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -59,6 +59,7 @@ void gatherFuncAndVarSyms(
     llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause);
 
 int64_t getCollapseValue(const List<Clause> &clauses);
+uint32_t getOpenMPVersion(mlir::ModuleOp mod);
 
 Fortran::semantics::Symbol *
 getOmpObjectSymbol(const Fortran::parser::OmpObject &ompObject);

>From 45d5017db069a8804fbdb75da223b68cbaab11f2 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 26 Mar 2024 10:14:13 -0500
Subject: [PATCH 2/8] [flang][OpenMP] Implement getIterationVariableSymbol
 helper function, NFC

---
 flang/lib/Lower/OpenMP/Utils.cpp | 24 +++++++++++++++++++++++-
 flang/lib/Lower/OpenMP/Utils.h   |  6 ++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 1400f1f4cc8dd2..c38e0c18cac88c 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -11,10 +11,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "Utils.h"
-#include "Clauses.h"
 
+#include "Clauses.h"
 #include <flang/Lower/AbstractConverter.h>
 #include <flang/Lower/ConvertType.h>
+#include <flang/Lower/PFTBuilder.h>
 #include <flang/Optimizer/Builder/FIRBuilder.h>
 #include <flang/Parser/parse-tree.h>
 #include <flang/Parser/tools.h>
@@ -87,6 +88,27 @@ mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter,
   return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize);
 }
 
+Fortran::semantics::Symbol *
+getIterationVariableSymbol(const Fortran::lower::pft::Evaluation &eval) {
+  return eval.visit(Fortran::common::visitors{
+      [&](const Fortran::parser::DoConstruct &doLoop) {
+        if (const auto &maybeCtrl = doLoop.GetLoopControl()) {
+          using LoopControl = Fortran::parser::LoopControl;
+          if (auto *bounds = std::get_if<LoopControl::Bounds>(&maybeCtrl->u)) {
+            static_assert(
+                std::is_same_v<decltype(bounds->name),
+                               Fortran::parser::Scalar<Fortran::parser::Name>>);
+            return bounds->name.thing.symbol;
+          }
+        }
+        return static_cast<Fortran::semantics::Symbol *>(nullptr);
+      },
+      [](auto &&) {
+        return static_cast<Fortran::semantics::Symbol *>(nullptr);
+      },
+  });
+}
+
 void gatherFuncAndVarSyms(
     const ObjectList &objects, mlir::omp::DeclareTargetCaptureClause clause,
     llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause) {
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 693a91f7cc6954..5e0ebba23bf358 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -34,6 +34,9 @@ struct OmpObjectList;
 } // namespace parser
 
 namespace lower {
+namespace pft {
+struct Evaluation;
+}
 
 class AbstractConverter;
 
@@ -54,6 +57,9 @@ createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc,
 mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter,
                           std::size_t loopVarTypeSize);
 
+Fortran::semantics::Symbol *
+getIterationVariableSymbol(const Fortran::lower::pft::Evaluation &eval);
+
 void gatherFuncAndVarSyms(
     const ObjectList &objects, mlir::omp::DeclareTargetCaptureClause clause,
     llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause);

>From 8f1ce585ea3b32551563c2949630ad4118d511e6 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Wed, 24 Apr 2024 09:51:49 -0500
Subject: [PATCH 3/8] [flang][OpenMP] Pass symTable to all genXYZ functions,
 NFC

This will unify the interface a bit more.
---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 171 ++++++++++++++++++------------
 1 file changed, 105 insertions(+), 66 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index f454f5a45a5150..47935e6cf8efcf 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -548,11 +548,12 @@ struct OpWithBodyGenInfo {
           mlir::Operation *)>;
 
   OpWithBodyGenInfo(Fortran::lower::AbstractConverter &converter,
+                    Fortran::lower::SymMap &symTable,
                     Fortran::semantics::SemanticsContext &semaCtx,
                     mlir::Location loc, Fortran::lower::pft::Evaluation &eval,
                     llvm::omp::Directive dir)
-      : converter(converter), semaCtx(semaCtx), loc(loc), eval(eval), dir(dir) {
-  }
+      : converter(converter), symTable(symTable), semaCtx(semaCtx), loc(loc),
+        eval(eval), dir(dir) {}
 
   OpWithBodyGenInfo &setGenNested(bool value) {
     genNested = value;
@@ -589,6 +590,8 @@ struct OpWithBodyGenInfo {
 
   /// [inout] converter to use for the clauses.
   Fortran::lower::AbstractConverter &converter;
+  /// [in] Symbol table
+  Fortran::lower::SymMap &symTable;
   /// [in] Semantics context
   Fortran::semantics::SemanticsContext &semaCtx;
   /// [in] location in source code.
@@ -764,6 +767,7 @@ static void createBodyOfOp(mlir::Operation &op, OpWithBodyGenInfo &info) {
 
 static void genBodyOfTargetDataOp(
     Fortran::lower::AbstractConverter &converter,
+    Fortran::lower::SymMap &symTable,
     Fortran::semantics::SemanticsContext &semaCtx,
     Fortran::lower::pft::Evaluation &eval, bool genNested,
     mlir::omp::TargetDataOp &dataOp, llvm::ArrayRef<mlir::Type> useDeviceTypes,
@@ -830,6 +834,7 @@ static void genBodyOfTargetDataOp(
 // all the symbols present in mapSymbols as block arguments to this block.
 static void
 genBodyOfTargetOp(Fortran::lower::AbstractConverter &converter,
+                  Fortran::lower::SymMap &symTable,
                   Fortran::semantics::SemanticsContext &semaCtx,
                   Fortran::lower::pft::Evaluation &eval, bool genNested,
                   mlir::omp::TargetOp &targetOp,
@@ -1267,6 +1272,7 @@ static void genWsloopClauses(
 
 static mlir::omp::BarrierOp
 genBarrierOp(Fortran::lower::AbstractConverter &converter,
+             Fortran::lower::SymMap &symTable,
              Fortran::semantics::SemanticsContext &semaCtx,
              Fortran::lower::pft::Evaluation &eval, mlir::Location loc) {
   return converter.getFirOpBuilder().create<mlir::omp::BarrierOp>(loc);
@@ -1274,6 +1280,7 @@ genBarrierOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::CriticalOp
 genCriticalOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
               Fortran::lower::pft::Evaluation &eval, bool genNested,
               mlir::Location loc, const List<Clause> &clauses,
@@ -1298,7 +1305,7 @@ genCriticalOp(Fortran::lower::AbstractConverter &converter,
   }
 
   return genOpWithBody<mlir::omp::CriticalOp>(
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_critical)
           .setGenNested(genNested),
       nameAttr);
@@ -1306,6 +1313,7 @@ genCriticalOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::DistributeOp
 genDistributeOp(Fortran::lower::AbstractConverter &converter,
+                Fortran::lower::SymMap &symTable,
                 Fortran::semantics::SemanticsContext &semaCtx,
                 Fortran::lower::pft::Evaluation &eval, bool genNested,
                 mlir::Location loc, const List<Clause> &clauses) {
@@ -1315,6 +1323,7 @@ genDistributeOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::FlushOp
 genFlushOp(Fortran::lower::AbstractConverter &converter,
+           Fortran::lower::SymMap &symTable,
            Fortran::semantics::SemanticsContext &semaCtx,
            Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
            const ObjectList &objects, const List<Clause> &clauses) {
@@ -1327,17 +1336,19 @@ genFlushOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::MasterOp
 genMasterOp(Fortran::lower::AbstractConverter &converter,
+            Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
             Fortran::lower::pft::Evaluation &eval, bool genNested,
             mlir::Location loc) {
   return genOpWithBody<mlir::omp::MasterOp>(
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_master)
           .setGenNested(genNested));
 }
 
 static mlir::omp::OrderedOp
 genOrderedOp(Fortran::lower::AbstractConverter &converter,
+             Fortran::lower::SymMap &symTable,
              Fortran::semantics::SemanticsContext &semaCtx,
              Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
              const List<Clause> &clauses) {
@@ -1347,6 +1358,7 @@ genOrderedOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::OrderedRegionOp
 genOrderedRegionOp(Fortran::lower::AbstractConverter &converter,
+                   Fortran::lower::SymMap &symTable,
                    Fortran::semantics::SemanticsContext &semaCtx,
                    Fortran::lower::pft::Evaluation &eval, bool genNested,
                    mlir::Location loc, const List<Clause> &clauses) {
@@ -1354,7 +1366,7 @@ genOrderedRegionOp(Fortran::lower::AbstractConverter &converter,
   genOrderedRegionClauses(converter, semaCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::OrderedRegionOp>(
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_ordered)
           .setGenNested(genNested),
       clauseOps);
@@ -1383,7 +1395,7 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
   };
 
   OpWithBodyGenInfo genInfo =
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_parallel)
           .setGenNested(genNested)
           .setOuterCombined(outerCombined)
@@ -1440,13 +1452,14 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::SectionOp
 genSectionOp(Fortran::lower::AbstractConverter &converter,
+             Fortran::lower::SymMap &symTable,
              Fortran::semantics::SemanticsContext &semaCtx,
              Fortran::lower::pft::Evaluation &eval, bool genNested,
              mlir::Location loc, const List<Clause> &clauses) {
   // Currently only private/firstprivate clause is handled, and
   // all privatization is done within `omp.section` operations.
   return genOpWithBody<mlir::omp::SectionOp>(
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_section)
           .setGenNested(genNested)
           .setClauses(&clauses));
@@ -1454,11 +1467,12 @@ genSectionOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::SectionsOp
 genSectionsOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
               const mlir::omp::SectionsClauseOps &clauseOps) {
   return genOpWithBody<mlir::omp::SectionsOp>(
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_sections)
           .setGenNested(false),
       clauseOps);
@@ -1466,6 +1480,7 @@ genSectionsOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::SimdOp
 genSimdOp(Fortran::lower::AbstractConverter &converter,
+          Fortran::lower::SymMap &symTable,
           Fortran::semantics::SemanticsContext &semaCtx,
           Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
           const List<Clause> &clauses) {
@@ -1499,8 +1514,8 @@ genSimdOp(Fortran::lower::AbstractConverter &converter,
   };
 
   createBodyOfOp(*loopOp,
-                 OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval,
-                                   llvm::omp::Directive::OMPD_simd)
+                 OpWithBodyGenInfo(converter, symTable, semaCtx, loc,
+                                   *nestedEval, llvm::omp::Directive::OMPD_simd)
                      .setClauses(&clauses)
                      .setDataSharingProcessor(&dsp)
                      .setGenRegionEntryCb(ivCallback));
@@ -1510,6 +1525,7 @@ genSimdOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::SingleOp
 genSingleOp(Fortran::lower::AbstractConverter &converter,
+            Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
             Fortran::lower::pft::Evaluation &eval, bool genNested,
             mlir::Location loc, const List<Clause> &clauses) {
@@ -1517,7 +1533,7 @@ genSingleOp(Fortran::lower::AbstractConverter &converter,
   genSingleClauses(converter, semaCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::SingleOp>(
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_single)
           .setGenNested(genNested)
           .setClauses(&clauses),
@@ -1526,6 +1542,7 @@ genSingleOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::TargetOp
 genTargetOp(Fortran::lower::AbstractConverter &converter,
+            Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
             Fortran::lower::pft::Evaluation &eval, bool genNested,
             mlir::Location loc, const List<Clause> &clauses,
@@ -1633,13 +1650,14 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
   Fortran::lower::pft::visitAllSymbols(eval, captureImplicitMap);
 
   auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps);
-  genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp, mapSyms,
-                    mapLocs, mapTypes, loc);
+  genBodyOfTargetOp(converter, symTable, semaCtx, eval, genNested, targetOp,
+                    mapSyms, mapLocs, mapTypes, loc);
   return targetOp;
 }
 
 static mlir::omp::TargetDataOp
 genTargetDataOp(Fortran::lower::AbstractConverter &converter,
+                Fortran::lower::SymMap &symTable,
                 Fortran::semantics::SemanticsContext &semaCtx,
                 Fortran::lower::pft::Evaluation &eval, bool genNested,
                 mlir::Location loc, const List<Clause> &clauses) {
@@ -1654,14 +1672,16 @@ genTargetDataOp(Fortran::lower::AbstractConverter &converter,
   auto targetDataOp =
       converter.getFirOpBuilder().create<mlir::omp::TargetDataOp>(loc,
                                                                   clauseOps);
-  genBodyOfTargetDataOp(converter, semaCtx, eval, genNested, targetDataOp,
-                        useDeviceTypes, useDeviceLocs, useDeviceSyms, loc);
+  genBodyOfTargetDataOp(converter, symTable, semaCtx, eval, genNested,
+                        targetDataOp, useDeviceTypes, useDeviceLocs,
+                        useDeviceSyms, loc);
   return targetDataOp;
 }
 
 template <typename OpTy>
 static OpTy
 genTargetEnterExitUpdateDataOp(Fortran::lower::AbstractConverter &converter,
+                               Fortran::lower::SymMap &symTable,
                                Fortran::semantics::SemanticsContext &semaCtx,
                                mlir::Location loc,
                                const List<Clause> &clauses) {
@@ -1689,6 +1709,7 @@ genTargetEnterExitUpdateDataOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::TaskOp
 genTaskOp(Fortran::lower::AbstractConverter &converter,
+          Fortran::lower::SymMap &symTable,
           Fortran::semantics::SemanticsContext &semaCtx,
           Fortran::lower::pft::Evaluation &eval, bool genNested,
           mlir::Location loc, const List<Clause> &clauses) {
@@ -1697,7 +1718,7 @@ genTaskOp(Fortran::lower::AbstractConverter &converter,
   genTaskClauses(converter, semaCtx, stmtCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::TaskOp>(
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_task)
           .setGenNested(genNested)
           .setClauses(&clauses),
@@ -1706,6 +1727,7 @@ genTaskOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::TaskgroupOp
 genTaskgroupOp(Fortran::lower::AbstractConverter &converter,
+               Fortran::lower::SymMap &symTable,
                Fortran::semantics::SemanticsContext &semaCtx,
                Fortran::lower::pft::Evaluation &eval, bool genNested,
                mlir::Location loc, const List<Clause> &clauses) {
@@ -1713,7 +1735,7 @@ genTaskgroupOp(Fortran::lower::AbstractConverter &converter,
   genTaskgroupClauses(converter, semaCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::TaskgroupOp>(
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_taskgroup)
           .setGenNested(genNested)
           .setClauses(&clauses),
@@ -1722,6 +1744,7 @@ genTaskgroupOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::TaskloopOp
 genTaskloopOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
               const List<Clause> &clauses) {
@@ -1730,6 +1753,7 @@ genTaskloopOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::TaskwaitOp
 genTaskwaitOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
               const List<Clause> &clauses) {
@@ -1741,6 +1765,7 @@ genTaskwaitOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::TaskyieldOp
 genTaskyieldOp(Fortran::lower::AbstractConverter &converter,
+               Fortran::lower::SymMap &symTable,
                Fortran::semantics::SemanticsContext &semaCtx,
                Fortran::lower::pft::Evaluation &eval, mlir::Location loc) {
   return converter.getFirOpBuilder().create<mlir::omp::TaskyieldOp>(loc);
@@ -1748,6 +1773,7 @@ genTaskyieldOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::TeamsOp
 genTeamsOp(Fortran::lower::AbstractConverter &converter,
+           Fortran::lower::SymMap &symTable,
            Fortran::semantics::SemanticsContext &semaCtx,
            Fortran::lower::pft::Evaluation &eval, bool genNested,
            mlir::Location loc, const List<Clause> &clauses,
@@ -1757,7 +1783,7 @@ genTeamsOp(Fortran::lower::AbstractConverter &converter,
   genTeamsClauses(converter, semaCtx, stmtCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::TeamsOp>(
-      OpWithBodyGenInfo(converter, semaCtx, loc, eval,
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_teams)
           .setGenNested(genNested)
           .setOuterCombined(outerCombined)
@@ -1767,6 +1793,7 @@ genTeamsOp(Fortran::lower::AbstractConverter &converter,
 
 static mlir::omp::WsloopOp
 genWsloopOp(Fortran::lower::AbstractConverter &converter,
+            Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
             Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
             const List<Clause> &clauses) {
@@ -1805,8 +1832,8 @@ genWsloopOp(Fortran::lower::AbstractConverter &converter,
   };
 
   createBodyOfOp(*loopOp,
-                 OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval,
-                                   llvm::omp::Directive::OMPD_do)
+                 OpWithBodyGenInfo(converter, symTable, semaCtx, loc,
+                                   *nestedEval, llvm::omp::Directive::OMPD_do)
                      .setClauses(&clauses)
                      .setDataSharingProcessor(&dsp)
                      .setReductions(&reductionSyms, &reductionTypes)
@@ -1820,6 +1847,7 @@ genWsloopOp(Fortran::lower::AbstractConverter &converter,
 
 static void
 genCompositeDistributeParallelDo(Fortran::lower::AbstractConverter &converter,
+                                 Fortran::lower::SymMap &symTable,
                                  Fortran::semantics::SemanticsContext &semaCtx,
                                  Fortran::lower::pft::Evaluation &eval,
                                  const List<Clause> &clauses,
@@ -1829,6 +1857,7 @@ genCompositeDistributeParallelDo(Fortran::lower::AbstractConverter &converter,
 
 static void genCompositeDistributeParallelDoSimd(
     Fortran::lower::AbstractConverter &converter,
+    Fortran::lower::SymMap &symTable,
     Fortran::semantics::SemanticsContext &semaCtx,
     Fortran::lower::pft::Evaluation &eval, const List<Clause> &clauses,
     mlir::Location loc) {
@@ -1837,6 +1866,7 @@ static void genCompositeDistributeParallelDoSimd(
 
 static void
 genCompositeDistributeSimd(Fortran::lower::AbstractConverter &converter,
+                           Fortran::lower::SymMap &symTable,
                            Fortran::semantics::SemanticsContext &semaCtx,
                            Fortran::lower::pft::Evaluation &eval,
                            const List<Clause> &clauses, mlir::Location loc) {
@@ -1844,6 +1874,7 @@ genCompositeDistributeSimd(Fortran::lower::AbstractConverter &converter,
 }
 
 static void genCompositeDoSimd(Fortran::lower::AbstractConverter &converter,
+                               Fortran::lower::SymMap &symTable,
                                Fortran::semantics::SemanticsContext &semaCtx,
                                Fortran::lower::pft::Evaluation &eval,
                                const List<Clause> &clauses,
@@ -1860,11 +1891,12 @@ static void genCompositeDoSimd(Fortran::lower::AbstractConverter &converter,
   // When support for vectorization is enabled, then we need to add handling of
   // if clause. Currently if clause can be skipped because we always assume
   // SIMD length = 1.
-  genWsloopOp(converter, semaCtx, eval, loc, clauses);
+  genWsloopOp(converter, symTable, semaCtx, eval, loc, clauses);
 }
 
 static void
 genCompositeTaskloopSimd(Fortran::lower::AbstractConverter &converter,
+                         Fortran::lower::SymMap &symTable,
                          Fortran::semantics::SemanticsContext &semaCtx,
                          Fortran::lower::pft::Evaluation &eval,
                          const List<Clause> &clauses, mlir::Location loc) {
@@ -1985,32 +2017,32 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
   default:
     break;
   case llvm::omp::Directive::OMPD_barrier:
-    genBarrierOp(converter, semaCtx, eval, currentLocation);
+    genBarrierOp(converter, symTable, semaCtx, eval, currentLocation);
     break;
   case llvm::omp::Directive::OMPD_taskwait:
-    genTaskwaitOp(converter, semaCtx, eval, currentLocation, clauses);
+    genTaskwaitOp(converter, symTable, semaCtx, eval, currentLocation, clauses);
     break;
   case llvm::omp::Directive::OMPD_taskyield:
-    genTaskyieldOp(converter, semaCtx, eval, currentLocation);
+    genTaskyieldOp(converter, symTable, semaCtx, eval, currentLocation);
     break;
   case llvm::omp::Directive::OMPD_target_data:
-    genTargetDataOp(converter, semaCtx, eval, /*genNested=*/true,
+    genTargetDataOp(converter, symTable, semaCtx, eval, /*genNested=*/true,
                     currentLocation, clauses);
     break;
   case llvm::omp::Directive::OMPD_target_enter_data:
     genTargetEnterExitUpdateDataOp<mlir::omp::TargetEnterDataOp>(
-        converter, semaCtx, currentLocation, clauses);
+        converter, symTable, semaCtx, currentLocation, clauses);
     break;
   case llvm::omp::Directive::OMPD_target_exit_data:
     genTargetEnterExitUpdateDataOp<mlir::omp::TargetExitDataOp>(
-        converter, semaCtx, currentLocation, clauses);
+        converter, symTable, semaCtx, currentLocation, clauses);
     break;
   case llvm::omp::Directive::OMPD_target_update:
     genTargetEnterExitUpdateDataOp<mlir::omp::TargetUpdateOp>(
-        converter, semaCtx, currentLocation, clauses);
+        converter, symTable, semaCtx, currentLocation, clauses);
     break;
   case llvm::omp::Directive::OMPD_ordered:
-    genOrderedOp(converter, semaCtx, eval, currentLocation, clauses);
+    genOrderedOp(converter, symTable, semaCtx, eval, currentLocation, clauses);
     break;
   }
 }
@@ -2034,7 +2066,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
                             [&](auto &&s) { return makeClause(s.v, semaCtx); })
                  : List<Clause>{};
   mlir::Location currentLocation = converter.genLocation(verbatim.source);
-  genFlushOp(converter, semaCtx, eval, currentLocation, objects, clauses);
+  genFlushOp(converter, symTable, semaCtx, eval, currentLocation, objects,
+             clauses);
 }
 
 static void
@@ -2187,12 +2220,13 @@ genOMP(Fortran::lower::AbstractConverter &converter,
     switch (leafDir) {
     case llvm::omp::Directive::OMPD_master:
       // 2.16 MASTER construct.
-      genMasterOp(converter, semaCtx, eval, genNested, currentLocation);
+      genMasterOp(converter, symTable, semaCtx, eval, genNested,
+                  currentLocation);
       break;
     case llvm::omp::Directive::OMPD_ordered:
       // 2.17.9 ORDERED construct.
-      genOrderedRegionOp(converter, semaCtx, eval, genNested, currentLocation,
-                         clauses);
+      genOrderedRegionOp(converter, symTable, semaCtx, eval, genNested,
+                         currentLocation, clauses);
       break;
     case llvm::omp::Directive::OMPD_parallel:
       // 2.6 PARALLEL construct.
@@ -2201,41 +2235,43 @@ genOMP(Fortran::lower::AbstractConverter &converter,
       break;
     case llvm::omp::Directive::OMPD_single:
       // 2.8.2 SINGLE construct.
-      genSingleOp(converter, semaCtx, eval, genNested, currentLocation,
-                  clauses);
+      genSingleOp(converter, symTable, semaCtx, eval, genNested,
+                  currentLocation, clauses);
       break;
     case llvm::omp::Directive::OMPD_target:
       // 2.12.5 TARGET construct.
-      genTargetOp(converter, semaCtx, eval, genNested, currentLocation, clauses,
-                  outerCombined);
+      genTargetOp(converter, symTable, semaCtx, eval, genNested,
+                  currentLocation, clauses, outerCombined);
       break;
     case llvm::omp::Directive::OMPD_target_data:
       // 2.12.2 TARGET DATA construct.
-      genTargetDataOp(converter, semaCtx, eval, genNested, currentLocation,
-                      clauses);
+      genTargetDataOp(converter, symTable, semaCtx, eval, genNested,
+                      currentLocation, clauses);
       break;
     case llvm::omp::Directive::OMPD_task:
       // 2.10.1 TASK construct.
-      genTaskOp(converter, semaCtx, eval, genNested, currentLocation, clauses);
+      genTaskOp(converter, symTable, semaCtx, eval, genNested, currentLocation,
+                clauses);
       break;
     case llvm::omp::Directive::OMPD_taskgroup:
       // 2.17.6 TASKGROUP construct.
-      genTaskgroupOp(converter, semaCtx, eval, genNested, currentLocation,
-                     clauses);
+      genTaskgroupOp(converter, symTable, semaCtx, eval, genNested,
+                     currentLocation, clauses);
       break;
     case llvm::omp::Directive::OMPD_teams:
       // 2.7 TEAMS construct.
       // FIXME Pass the outerCombined argument or rename it to better describe
       // what it represents if it must always be `false` in this context.
-      genTeamsOp(converter, semaCtx, eval, genNested, currentLocation, clauses);
+      genTeamsOp(converter, symTable, semaCtx, eval, genNested, currentLocation,
+                 clauses);
       break;
     case llvm::omp::Directive::OMPD_workshare:
       // 2.8.3 WORKSHARE construct.
       // FIXME: Workshare is not a commonly used OpenMP construct, an
       // implementation for this feature will come later. For the codes
       // that use this construct, add a single construct for now.
-      genSingleOp(converter, semaCtx, eval, genNested, currentLocation,
-                  clauses);
+      genSingleOp(converter, symTable, semaCtx, eval, genNested,
+                  currentLocation, clauses);
       break;
     default:
       llvm_unreachable("Unexpected block construct");
@@ -2257,8 +2293,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
       makeClauses(std::get<Fortran::parser::OmpClauseList>(cd.t), semaCtx);
   const auto &name = std::get<std::optional<Fortran::parser::Name>>(cd.t);
   mlir::Location currentLocation = converter.getCurrentLocation();
-  genCriticalOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
-                clauses, name);
+  genCriticalOp(converter, symTable, semaCtx, eval, /*genNested=*/true,
+                currentLocation, clauses, name);
 }
 
 static void
@@ -2304,26 +2340,27 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
       switch (leafDir) {
       case llvm::omp::Directive::OMPD_distribute_parallel_do:
         // 2.9.4.3 DISTRIBUTE PARALLEL Worksharing-Loop construct.
-        genCompositeDistributeParallelDo(converter, semaCtx, eval, clauses,
-                                         currentLocation);
+        genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval,
+                                         clauses, currentLocation);
         break;
       case llvm::omp::Directive::OMPD_distribute_parallel_do_simd:
         // 2.9.4.4 DISTRIBUTE PARALLEL Worksharing-Loop SIMD construct.
-        genCompositeDistributeParallelDoSimd(converter, semaCtx, eval, clauses,
-                                             currentLocation);
+        genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval,
+                                             clauses, currentLocation);
         break;
       case llvm::omp::Directive::OMPD_distribute_simd:
         // 2.9.4.2 DISTRIBUTE SIMD construct.
-        genCompositeDistributeSimd(converter, semaCtx, eval, clauses,
+        genCompositeDistributeSimd(converter, symTable, semaCtx, eval, clauses,
                                    currentLocation);
         break;
       case llvm::omp::Directive::OMPD_do_simd:
         // 2.9.3.2 Worksharing-Loop SIMD construct.
-        genCompositeDoSimd(converter, semaCtx, eval, clauses, currentLocation);
+        genCompositeDoSimd(converter, symTable, semaCtx, eval, clauses,
+                           currentLocation);
         break;
       case llvm::omp::Directive::OMPD_taskloop_simd:
         // 2.10.3 TASKLOOP SIMD construct.
-        genCompositeTaskloopSimd(converter, semaCtx, eval, clauses,
+        genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, clauses,
                                  currentLocation);
         break;
       default:
@@ -2334,12 +2371,13 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
       switch (leafDir) {
       case llvm::omp::Directive::OMPD_distribute:
         // 2.9.4.1 DISTRIBUTE construct.
-        genDistributeOp(converter, semaCtx, eval, genNested, currentLocation,
-                        clauses);
+        genDistributeOp(converter, symTable, semaCtx, eval, genNested,
+                        currentLocation, clauses);
         break;
       case llvm::omp::Directive::OMPD_do:
         // 2.9.2 Worksharing-Loop construct.
-        genWsloopOp(converter, semaCtx, eval, currentLocation, clauses);
+        genWsloopOp(converter, symTable, semaCtx, eval, currentLocation,
+                    clauses);
         break;
       case llvm::omp::Directive::OMPD_parallel:
         // 2.6 PARALLEL construct.
@@ -2353,16 +2391,17 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
         break;
       case llvm::omp::Directive::OMPD_simd:
         // 2.9.3.1 SIMD construct.
-        genSimdOp(converter, semaCtx, eval, currentLocation, clauses);
+        genSimdOp(converter, symTable, semaCtx, eval, currentLocation, clauses);
         break;
       case llvm::omp::Directive::OMPD_target:
         // 2.12.5 TARGET construct.
-        genTargetOp(converter, semaCtx, eval, genNested, currentLocation,
-                    clauses, /*outerCombined=*/true);
+        genTargetOp(converter, symTable, semaCtx, eval, genNested,
+                    currentLocation, clauses, /*outerCombined=*/true);
         break;
       case llvm::omp::Directive::OMPD_taskloop:
         // 2.10.2 TASKLOOP construct.
-        genTaskloopOp(converter, semaCtx, eval, currentLocation, clauses);
+        genTaskloopOp(converter, symTable, semaCtx, eval, currentLocation,
+                      clauses);
         break;
       case llvm::omp::Directive::OMPD_teams:
         // 2.7 TEAMS construct.
@@ -2370,8 +2409,8 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
         // combined construct in this constext (e.g. target teams distribute).
         // Maybe rename the argument if it represents something else or
         // initialize it properly.
-        genTeamsOp(converter, semaCtx, eval, genNested, currentLocation,
-                   clauses, /*outerCombined=*/true);
+        genTeamsOp(converter, symTable, semaCtx, eval, genNested,
+                   currentLocation, clauses, /*outerCombined=*/true);
         break;
       case llvm::omp::Directive::OMPD_loop:
       case llvm::omp::Directive::OMPD_masked:
@@ -2433,7 +2472,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
   }
 
   // SECTIONS construct.
-  genSectionsOp(converter, semaCtx, eval, currentLocation, clauseOps);
+  genSectionsOp(converter, symTable, semaCtx, eval, currentLocation, clauseOps);
 
   // Generate nested SECTION operations recursively.
   const auto &sectionBlocks =
@@ -2443,8 +2482,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
   for (const auto &[nblock, neval] :
        llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) {
     symTable.pushScope();
-    genSectionOp(converter, semaCtx, neval, /*genNested=*/true, currentLocation,
-                 clauses);
+    genSectionOp(converter, symTable, semaCtx, neval, /*genNested=*/true,
+                 currentLocation, clauses);
     symTable.popScope();
     firOpBuilder.restoreInsertionPoint(ip);
   }

>From 6f7697e46ace92707bc4cf648fab25a72c0639a1 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Thu, 11 Apr 2024 10:35:02 -0500
Subject: [PATCH 4/8] [flang][OpenMP] Decompose compound construccts, do
 recursive lowering

A compound construct with a list of clauses is broken up into
individual leaf/composite constructs. Each such construct has
the list of clauses that apply to it based on the OpenMP spec.

Each lowering function (i.e. a function that generates MLIR ops)
is now responsible for generating its body as described below.

Functions that receive AST nodes extract the construct, and the
clauses from the node. They then create a work queue consisting
of individual constructs, and invoke a common dispatch function.

The dispatch function examines the current position in the queue,
and invokes the appropriate lowering function. Each lowering
function receives the queue as well, and once it needs to generate
its body, it either invokes the dispatch function on the rest of
the queue (if any), or processes nested evaluations if the work
queue is at the end.
---
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 784 +++++++-------
 .../Frontend/OpenMP/ConstructDecompositionT.h | 985 ++++++++++++++++++
 2 files changed, 1376 insertions(+), 393 deletions(-)
 create mode 100644 llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 47935e6cf8efcf..4b8afd42f639d5 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -36,6 +36,7 @@
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Frontend/OpenMP/ConstructDecompositionT.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 
 using namespace Fortran::lower::omp;
@@ -72,6 +73,89 @@ static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
     converter.genEval(e);
 }
 
+//===----------------------------------------------------------------------===//
+// Directive decomposition
+//===----------------------------------------------------------------------===//
+
+namespace {
+using DirectiveWithClauses = tomp::DirectiveWithClauses<lower::omp::Clause>;
+using ConstructQueue = List<DirectiveWithClauses>;
+} // namespace
+
+static void genOMPDispatch(Fortran::lower::AbstractConverter &converter,
+                           Fortran::lower::SymMap &symTable,
+                           Fortran::semantics::SemanticsContext &semaCtx,
+                           Fortran::lower::pft::Evaluation &eval,
+                           mlir::Location loc, const ConstructQueue &queue,
+                           ConstructQueue::iterator item);
+
+namespace {
+struct ConstructDecomposition {
+  ConstructDecomposition(mlir::ModuleOp modOp,
+                         semantics::SemanticsContext &semaCtx,
+                         lower::pft::Evaluation &ev,
+                         llvm::omp::Directive construct,
+                         const List<Clause> &clauses)
+      : semaCtx(semaCtx), mod(modOp), eval(ev) {
+    tomp::ConstructDecompositionT decompose(getOpenMPVersion(modOp), *this,
+                                            construct, llvm::ArrayRef(clauses));
+    output = std::move(decompose.output);
+  }
+
+  // Given an object, return its base object if one exists.
+  std::optional<Object> getBaseObject(const Object &object) {
+    return lower::omp::getBaseObject(object, semaCtx);
+  }
+
+  // Return the iteration variable of the associated loop if any.
+  std::optional<Object> getLoopIterVar() {
+    if (semantics::Symbol *symbol = getIterationVariableSymbol(eval))
+      return Object{symbol, /*designator=*/{}};
+    return std::nullopt;
+  }
+
+  semantics::SemanticsContext &semaCtx;
+  mlir::ModuleOp mod;
+  lower::pft::Evaluation &eval;
+  List<DirectiveWithClauses> output;
+};
+} // namespace
+
+LLVM_DUMP_METHOD static llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os, const DirectiveWithClauses &dwc) {
+  os << llvm::omp::getOpenMPDirectiveName(dwc.id);
+  for (auto [index, clause] : llvm::enumerate(dwc.clauses)) {
+    os << (index == 0 ? '\t' : ' ');
+    os << llvm::omp::getOpenMPClauseName(clause.id);
+  }
+  return os;
+}
+
+static void splitCompoundConstruct(
+    mlir::ModuleOp modOp, Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::pft::Evaluation &eval, llvm::omp::Directive construct,
+    const List<Clause> &clauses, List<DirectiveWithClauses> &directives) {
+
+  ConstructDecomposition decompose(modOp, semaCtx, eval, construct, clauses);
+  assert(!decompose.output.empty());
+
+  llvm::SmallVector<llvm::omp::Directive> loweringUnits;
+  std::ignore =
+      llvm::omp::getLeafOrCompositeConstructs(construct, loweringUnits);
+
+  int leafIndex = 0;
+  for (llvm::omp::Directive dir_id : loweringUnits) {
+    directives.push_back(DirectiveWithClauses{dir_id});
+    DirectiveWithClauses &dwc = directives.back();
+    llvm::ArrayRef<llvm::omp::Directive> leafsOrSelf =
+        llvm::omp::getLeafConstructsOrSelf(dir_id);
+    for (int i = 0, e = leafsOrSelf.size(); i != e; ++i) {
+      dwc.clauses.append(decompose.output[leafIndex].clauses);
+      ++leafIndex;
+    }
+  }
+}
+
 static fir::GlobalOp globalInitialization(
     Fortran::lower::AbstractConverter &converter,
     fir::FirOpBuilder &firOpBuilder, const Fortran::semantics::Symbol &sym,
@@ -460,81 +544,6 @@ markDeclareTarget(mlir::Operation *op,
   declareTargetOp.setDeclareTarget(deviceType, captureClause);
 }
 
-/// Split a combined directive into an outer leaf directive and the (possibly
-/// combined) rest of the combined directive. Composite directives and
-/// non-compound directives are not split, in which case it will return the
-/// input directive as its first output and an empty value as its second output.
-static std::pair<llvm::omp::Directive, std::optional<llvm::omp::Directive>>
-splitCombinedDirective(llvm::omp::Directive dir) {
-  using D = llvm::omp::Directive;
-  switch (dir) {
-  case D::OMPD_masked_taskloop:
-    return {D::OMPD_masked, D::OMPD_taskloop};
-  case D::OMPD_masked_taskloop_simd:
-    return {D::OMPD_masked, D::OMPD_taskloop_simd};
-  case D::OMPD_master_taskloop:
-    return {D::OMPD_master, D::OMPD_taskloop};
-  case D::OMPD_master_taskloop_simd:
-    return {D::OMPD_master, D::OMPD_taskloop_simd};
-  case D::OMPD_parallel_do:
-    return {D::OMPD_parallel, D::OMPD_do};
-  case D::OMPD_parallel_do_simd:
-    return {D::OMPD_parallel, D::OMPD_do_simd};
-  case D::OMPD_parallel_masked:
-    return {D::OMPD_parallel, D::OMPD_masked};
-  case D::OMPD_parallel_masked_taskloop:
-    return {D::OMPD_parallel, D::OMPD_masked_taskloop};
-  case D::OMPD_parallel_masked_taskloop_simd:
-    return {D::OMPD_parallel, D::OMPD_masked_taskloop_simd};
-  case D::OMPD_parallel_master:
-    return {D::OMPD_parallel, D::OMPD_master};
-  case D::OMPD_parallel_master_taskloop:
-    return {D::OMPD_parallel, D::OMPD_master_taskloop};
-  case D::OMPD_parallel_master_taskloop_simd:
-    return {D::OMPD_parallel, D::OMPD_master_taskloop_simd};
-  case D::OMPD_parallel_sections:
-    return {D::OMPD_parallel, D::OMPD_sections};
-  case D::OMPD_parallel_workshare:
-    return {D::OMPD_parallel, D::OMPD_workshare};
-  case D::OMPD_target_parallel:
-    return {D::OMPD_target, D::OMPD_parallel};
-  case D::OMPD_target_parallel_do:
-    return {D::OMPD_target, D::OMPD_parallel_do};
-  case D::OMPD_target_parallel_do_simd:
-    return {D::OMPD_target, D::OMPD_parallel_do_simd};
-  case D::OMPD_target_simd:
-    return {D::OMPD_target, D::OMPD_simd};
-  case D::OMPD_target_teams:
-    return {D::OMPD_target, D::OMPD_teams};
-  case D::OMPD_target_teams_distribute:
-    return {D::OMPD_target, D::OMPD_teams_distribute};
-  case D::OMPD_target_teams_distribute_parallel_do:
-    return {D::OMPD_target, D::OMPD_teams_distribute_parallel_do};
-  case D::OMPD_target_teams_distribute_parallel_do_simd:
-    return {D::OMPD_target, D::OMPD_teams_distribute_parallel_do_simd};
-  case D::OMPD_target_teams_distribute_simd:
-    return {D::OMPD_target, D::OMPD_teams_distribute_simd};
-  case D::OMPD_teams_distribute:
-    return {D::OMPD_teams, D::OMPD_distribute};
-  case D::OMPD_teams_distribute_parallel_do:
-    return {D::OMPD_teams, D::OMPD_distribute_parallel_do};
-  case D::OMPD_teams_distribute_parallel_do_simd:
-    return {D::OMPD_teams, D::OMPD_distribute_parallel_do_simd};
-  case D::OMPD_teams_distribute_simd:
-    return {D::OMPD_teams, D::OMPD_distribute_simd};
-  case D::OMPD_parallel_loop:
-    return {D::OMPD_parallel, D::OMPD_loop};
-  case D::OMPD_target_parallel_loop:
-    return {D::OMPD_target, D::OMPD_parallel_loop};
-  case D::OMPD_target_teams_loop:
-    return {D::OMPD_target, D::OMPD_teams_loop};
-  case D::OMPD_teams_loop:
-    return {D::OMPD_teams, D::OMPD_loop};
-  default:
-    return {dir, std::nullopt};
-  }
-}
-
 //===----------------------------------------------------------------------===//
 // Op body generation helper structures and functions
 //===----------------------------------------------------------------------===//
@@ -555,11 +564,6 @@ struct OpWithBodyGenInfo {
       : converter(converter), symTable(symTable), semaCtx(semaCtx), loc(loc),
         eval(eval), dir(dir) {}
 
-  OpWithBodyGenInfo &setGenNested(bool value) {
-    genNested = value;
-    return *this;
-  }
-
   OpWithBodyGenInfo &setOuterCombined(bool value) {
     outerCombined = value;
     return *this;
@@ -600,8 +604,6 @@ struct OpWithBodyGenInfo {
   Fortran::lower::pft::Evaluation &eval;
   /// [in] leaf directive for which to generate the op body.
   llvm::omp::Directive dir;
-  /// [in] whether to generate FIR for nested evaluations
-  bool genNested = true;
   /// [in] is this an outer operation - prevents privatization.
   bool outerCombined = false;
   /// [in] list of clauses to process.
@@ -622,7 +624,9 @@ struct OpWithBodyGenInfo {
 ///
 /// \param [in]   op - the operation the body belongs to.
 /// \param [in] info - options controlling code-gen for the construction.
-static void createBodyOfOp(mlir::Operation &op, OpWithBodyGenInfo &info) {
+static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info,
+                           const ConstructQueue &queue,
+                           ConstructQueue::iterator item) {
   fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder();
 
   auto insertMarker = [](fir::FirOpBuilder &builder) {
@@ -678,7 +682,10 @@ static void createBodyOfOp(mlir::Operation &op, OpWithBodyGenInfo &info) {
     }
   }
 
-  if (info.genNested) {
+  if (ConstructQueue::iterator next = std::next(item); next != queue.end()) {
+    genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.eval,
+                   info.loc, queue, next);
+  } else {
     // genFIR(Evaluation&) tries to patch up unterminated blocks, causing
     // a lot of complications for our approach if the terminator generation
     // is delayed past this point. Insert a temporary terminator here, then
@@ -769,11 +776,12 @@ static void genBodyOfTargetDataOp(
     Fortran::lower::AbstractConverter &converter,
     Fortran::lower::SymMap &symTable,
     Fortran::semantics::SemanticsContext &semaCtx,
-    Fortran::lower::pft::Evaluation &eval, bool genNested,
-    mlir::omp::TargetDataOp &dataOp, llvm::ArrayRef<mlir::Type> useDeviceTypes,
+    Fortran::lower::pft::Evaluation &eval, mlir::omp::TargetDataOp &dataOp,
+    llvm::ArrayRef<mlir::Type> useDeviceTypes,
     llvm::ArrayRef<mlir::Location> useDeviceLocs,
     llvm::ArrayRef<const Fortran::semantics::Symbol *> useDeviceSymbols,
-    const mlir::Location &currentLocation) {
+    const mlir::Location &currentLocation, const ConstructQueue &queue,
+    ConstructQueue::iterator item) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   mlir::Region &region = dataOp.getRegion();
 
@@ -826,8 +834,13 @@ static void genBodyOfTargetDataOp(
 
   // Set the insertion point after the marker.
   firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
-  if (genNested)
+
+  if (ConstructQueue::iterator next = std::next(item); next != queue.end()) {
+    genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
+                   next);
+  } else {
     genNestedEvaluations(converter, eval);
+  }
 }
 
 // This functions creates a block for the body of the targetOp's region. It adds
@@ -836,12 +849,13 @@ static void
 genBodyOfTargetOp(Fortran::lower::AbstractConverter &converter,
                   Fortran::lower::SymMap &symTable,
                   Fortran::semantics::SemanticsContext &semaCtx,
-                  Fortran::lower::pft::Evaluation &eval, bool genNested,
+                  Fortran::lower::pft::Evaluation &eval,
                   mlir::omp::TargetOp &targetOp,
                   llvm::ArrayRef<const Fortran::semantics::Symbol *> mapSyms,
                   llvm::ArrayRef<mlir::Location> mapSymLocs,
                   llvm::ArrayRef<mlir::Type> mapSymTypes,
-                  const mlir::Location &currentLocation) {
+                  const mlir::Location &currentLocation,
+                  const ConstructQueue &queue, ConstructQueue::iterator item) {
   assert(mapSymTypes.size() == mapSymLocs.size());
 
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
@@ -981,15 +995,22 @@ genBodyOfTargetOp(Fortran::lower::AbstractConverter &converter,
 
   // Create the insertion point after the marker.
   firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
-  if (genNested)
+
+  if (ConstructQueue::iterator next = std::next(item); next != queue.end()) {
+    genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
+                   next);
+  } else {
     genNestedEvaluations(converter, eval);
+  }
 }
 
 template <typename OpTy, typename... Args>
-static OpTy genOpWithBody(OpWithBodyGenInfo &info, Args &&...args) {
+static OpTy genOpWithBody(const OpWithBodyGenInfo &info,
+                          const ConstructQueue &queue,
+                          ConstructQueue::iterator item, Args &&...args) {
   auto op = info.converter.getFirOpBuilder().create<OpTy>(
       info.loc, std::forward<Args>(args)...);
-  createBodyOfOp(*op, info);
+  createBodyOfOp(*op, info, queue, item);
   return op;
 }
 
@@ -1274,7 +1295,8 @@ static mlir::omp::BarrierOp
 genBarrierOp(Fortran::lower::AbstractConverter &converter,
              Fortran::lower::SymMap &symTable,
              Fortran::semantics::SemanticsContext &semaCtx,
-             Fortran::lower::pft::Evaluation &eval, mlir::Location loc) {
+             Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+             const ConstructQueue &queue, ConstructQueue::iterator item) {
   return converter.getFirOpBuilder().create<mlir::omp::BarrierOp>(loc);
 }
 
@@ -1282,8 +1304,9 @@ static mlir::omp::CriticalOp
 genCriticalOp(Fortran::lower::AbstractConverter &converter,
               Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
-              Fortran::lower::pft::Evaluation &eval, bool genNested,
-              mlir::Location loc, const List<Clause> &clauses,
+              Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+              const List<Clause> &clauses, const ConstructQueue &queue,
+              ConstructQueue::iterator item,
               const std::optional<Fortran::parser::Name> &name) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   mlir::FlatSymbolRefAttr nameAttr;
@@ -1306,17 +1329,17 @@ genCriticalOp(Fortran::lower::AbstractConverter &converter,
 
   return genOpWithBody<mlir::omp::CriticalOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
-                        llvm::omp::Directive::OMPD_critical)
-          .setGenNested(genNested),
-      nameAttr);
+                        llvm::omp::Directive::OMPD_critical),
+      queue, item, nameAttr);
 }
 
 static mlir::omp::DistributeOp
 genDistributeOp(Fortran::lower::AbstractConverter &converter,
                 Fortran::lower::SymMap &symTable,
                 Fortran::semantics::SemanticsContext &semaCtx,
-                Fortran::lower::pft::Evaluation &eval, bool genNested,
-                mlir::Location loc, const List<Clause> &clauses) {
+                Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+                const List<Clause> &clauses, const ConstructQueue &queue,
+                ConstructQueue::iterator item) {
   TODO(loc, "Distribute construct");
   return nullptr;
 }
@@ -1326,7 +1349,8 @@ genFlushOp(Fortran::lower::AbstractConverter &converter,
            Fortran::lower::SymMap &symTable,
            Fortran::semantics::SemanticsContext &semaCtx,
            Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-           const ObjectList &objects, const List<Clause> &clauses) {
+           const ObjectList &objects, const List<Clause> &clauses,
+           const ConstructQueue &queue, ConstructQueue::iterator item) {
   llvm::SmallVector<mlir::Value> operandRange;
   genFlushClauses(converter, semaCtx, objects, clauses, loc, operandRange);
 
@@ -1338,12 +1362,13 @@ static mlir::omp::MasterOp
 genMasterOp(Fortran::lower::AbstractConverter &converter,
             Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
-            Fortran::lower::pft::Evaluation &eval, bool genNested,
-            mlir::Location loc) {
+            Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+            const List<Clause> &clauses, const ConstructQueue &queue,
+            ConstructQueue::iterator item) {
   return genOpWithBody<mlir::omp::MasterOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
-                        llvm::omp::Directive::OMPD_master)
-          .setGenNested(genNested));
+                        llvm::omp::Directive::OMPD_master),
+      queue, item);
 }
 
 static mlir::omp::OrderedOp
@@ -1351,7 +1376,8 @@ genOrderedOp(Fortran::lower::AbstractConverter &converter,
              Fortran::lower::SymMap &symTable,
              Fortran::semantics::SemanticsContext &semaCtx,
              Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-             const List<Clause> &clauses) {
+             const List<Clause> &clauses, const ConstructQueue &queue,
+             ConstructQueue::iterator item) {
   TODO(loc, "OMPD_ordered");
   return nullptr;
 }
@@ -1360,25 +1386,25 @@ static mlir::omp::OrderedRegionOp
 genOrderedRegionOp(Fortran::lower::AbstractConverter &converter,
                    Fortran::lower::SymMap &symTable,
                    Fortran::semantics::SemanticsContext &semaCtx,
-                   Fortran::lower::pft::Evaluation &eval, bool genNested,
-                   mlir::Location loc, const List<Clause> &clauses) {
+                   Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+                   const List<Clause> &clauses, const ConstructQueue &queue,
+                   ConstructQueue::iterator item) {
   mlir::omp::OrderedRegionClauseOps clauseOps;
   genOrderedRegionClauses(converter, semaCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::OrderedRegionOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
-                        llvm::omp::Directive::OMPD_ordered)
-          .setGenNested(genNested),
-      clauseOps);
+                        llvm::omp::Directive::OMPD_ordered),
+      queue, item, clauseOps);
 }
 
 static mlir::omp::ParallelOp
 genParallelOp(Fortran::lower::AbstractConverter &converter,
               Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
-              Fortran::lower::pft::Evaluation &eval, bool genNested,
-              mlir::Location loc, const List<Clause> &clauses,
-              bool outerCombined = false) {
+              Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+              const List<Clause> &clauses, const ConstructQueue &queue,
+              ConstructQueue::iterator item, bool outerCombined = false) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   Fortran::lower::StatementContext stmtCtx;
   mlir::omp::ParallelClauseOps clauseOps;
@@ -1397,14 +1423,14 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
   OpWithBodyGenInfo genInfo =
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_parallel)
-          .setGenNested(genNested)
           .setOuterCombined(outerCombined)
           .setClauses(&clauses)
           .setReductions(&reductionSyms, &reductionTypes)
           .setGenRegionEntryCb(reductionCallback);
 
   if (!enableDelayedPrivatization)
-    return genOpWithBody<mlir::omp::ParallelOp>(genInfo, clauseOps);
+    return genOpWithBody<mlir::omp::ParallelOp>(genInfo, queue, item,
+                                                clauseOps);
 
   bool privatize = !outerCombined;
   DataSharingProcessor dsp(converter, semaCtx, clauses, eval,
@@ -1447,22 +1473,23 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
 
   // TODO Merge with the reduction CB.
   genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp);
-  return genOpWithBody<mlir::omp::ParallelOp>(genInfo, clauseOps);
+  return genOpWithBody<mlir::omp::ParallelOp>(genInfo, queue, item, clauseOps);
 }
 
 static mlir::omp::SectionOp
 genSectionOp(Fortran::lower::AbstractConverter &converter,
              Fortran::lower::SymMap &symTable,
              Fortran::semantics::SemanticsContext &semaCtx,
-             Fortran::lower::pft::Evaluation &eval, bool genNested,
-             mlir::Location loc, const List<Clause> &clauses) {
+             Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+             const List<Clause> &clauses, const ConstructQueue &queue,
+             ConstructQueue::iterator item) {
   // Currently only private/firstprivate clause is handled, and
   // all privatization is done within `omp.section` operations.
   return genOpWithBody<mlir::omp::SectionOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_section)
-          .setGenNested(genNested)
-          .setClauses(&clauses));
+          .setClauses(&clauses),
+      queue, item);
 }
 
 static mlir::omp::SectionsOp
@@ -1470,12 +1497,14 @@ genSectionsOp(Fortran::lower::AbstractConverter &converter,
               Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-              const mlir::omp::SectionsClauseOps &clauseOps) {
+              const List<Clause> &clauses, const ConstructQueue &queue,
+              ConstructQueue::iterator item) {
+  mlir::omp::SectionsClauseOps clauseOps;
+  genSectionsClauses(converter, semaCtx, clauses, loc, clauseOps);
   return genOpWithBody<mlir::omp::SectionsOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
-                        llvm::omp::Directive::OMPD_sections)
-          .setGenNested(false),
-      clauseOps);
+                        llvm::omp::Directive::OMPD_sections),
+      queue, item, clauseOps);
 }
 
 static mlir::omp::SimdOp
@@ -1483,7 +1512,8 @@ genSimdOp(Fortran::lower::AbstractConverter &converter,
           Fortran::lower::SymMap &symTable,
           Fortran::semantics::SemanticsContext &semaCtx,
           Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-          const List<Clause> &clauses) {
+          const List<Clause> &clauses, const ConstructQueue &queue,
+          ConstructQueue::iterator item) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   DataSharingProcessor dsp(converter, semaCtx, clauses, eval);
   dsp.processStep1();
@@ -1518,7 +1548,8 @@ genSimdOp(Fortran::lower::AbstractConverter &converter,
                                    *nestedEval, llvm::omp::Directive::OMPD_simd)
                      .setClauses(&clauses)
                      .setDataSharingProcessor(&dsp)
-                     .setGenRegionEntryCb(ivCallback));
+                     .setGenRegionEntryCb(ivCallback),
+                 queue, item);
 
   return simdOp;
 }
@@ -1527,26 +1558,26 @@ static mlir::omp::SingleOp
 genSingleOp(Fortran::lower::AbstractConverter &converter,
             Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
-            Fortran::lower::pft::Evaluation &eval, bool genNested,
-            mlir::Location loc, const List<Clause> &clauses) {
+            Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+            const List<Clause> &clauses, const ConstructQueue &queue,
+            ConstructQueue::iterator item) {
   mlir::omp::SingleClauseOps clauseOps;
   genSingleClauses(converter, semaCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::SingleOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_single)
-          .setGenNested(genNested)
           .setClauses(&clauses),
-      clauseOps);
+      queue, item, clauseOps);
 }
 
 static mlir::omp::TargetOp
 genTargetOp(Fortran::lower::AbstractConverter &converter,
             Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
-            Fortran::lower::pft::Evaluation &eval, bool genNested,
-            mlir::Location loc, const List<Clause> &clauses,
-            bool outerCombined = false) {
+            Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+            const List<Clause> &clauses, const ConstructQueue &queue,
+            ConstructQueue::iterator item, bool outerCombined = false) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   Fortran::lower::StatementContext stmtCtx;
 
@@ -1650,8 +1681,8 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
   Fortran::lower::pft::visitAllSymbols(eval, captureImplicitMap);
 
   auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps);
-  genBodyOfTargetOp(converter, symTable, semaCtx, eval, genNested, targetOp,
-                    mapSyms, mapLocs, mapTypes, loc);
+  genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, mapSyms,
+                    mapLocs, mapTypes, loc, queue, item);
   return targetOp;
 }
 
@@ -1659,8 +1690,9 @@ static mlir::omp::TargetDataOp
 genTargetDataOp(Fortran::lower::AbstractConverter &converter,
                 Fortran::lower::SymMap &symTable,
                 Fortran::semantics::SemanticsContext &semaCtx,
-                Fortran::lower::pft::Evaluation &eval, bool genNested,
-                mlir::Location loc, const List<Clause> &clauses) {
+                Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+                const List<Clause> &clauses, const ConstructQueue &queue,
+                ConstructQueue::iterator item) {
   Fortran::lower::StatementContext stmtCtx;
   mlir::omp::TargetDataClauseOps clauseOps;
   llvm::SmallVector<mlir::Type> useDeviceTypes;
@@ -1672,9 +1704,9 @@ genTargetDataOp(Fortran::lower::AbstractConverter &converter,
   auto targetDataOp =
       converter.getFirOpBuilder().create<mlir::omp::TargetDataOp>(loc,
                                                                   clauseOps);
-  genBodyOfTargetDataOp(converter, symTable, semaCtx, eval, genNested,
-                        targetDataOp, useDeviceTypes, useDeviceLocs,
-                        useDeviceSyms, loc);
+  genBodyOfTargetDataOp(converter, symTable, semaCtx, eval, targetDataOp,
+                        useDeviceTypes, useDeviceLocs, useDeviceSyms, loc,
+                        queue, item);
   return targetDataOp;
 }
 
@@ -1683,8 +1715,9 @@ static OpTy
 genTargetEnterExitUpdateDataOp(Fortran::lower::AbstractConverter &converter,
                                Fortran::lower::SymMap &symTable,
                                Fortran::semantics::SemanticsContext &semaCtx,
-                               mlir::Location loc,
-                               const List<Clause> &clauses) {
+                               mlir::Location loc, const List<Clause> &clauses,
+                               const ConstructQueue &queue,
+                               ConstructQueue::iterator item) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   Fortran::lower::StatementContext stmtCtx;
 
@@ -1711,8 +1744,9 @@ static mlir::omp::TaskOp
 genTaskOp(Fortran::lower::AbstractConverter &converter,
           Fortran::lower::SymMap &symTable,
           Fortran::semantics::SemanticsContext &semaCtx,
-          Fortran::lower::pft::Evaluation &eval, bool genNested,
-          mlir::Location loc, const List<Clause> &clauses) {
+          Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+          const List<Clause> &clauses, const ConstructQueue &queue,
+          ConstructQueue::iterator item) {
   Fortran::lower::StatementContext stmtCtx;
   mlir::omp::TaskClauseOps clauseOps;
   genTaskClauses(converter, semaCtx, stmtCtx, clauses, loc, clauseOps);
@@ -1720,26 +1754,25 @@ genTaskOp(Fortran::lower::AbstractConverter &converter,
   return genOpWithBody<mlir::omp::TaskOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_task)
-          .setGenNested(genNested)
           .setClauses(&clauses),
-      clauseOps);
+      queue, item, clauseOps);
 }
 
 static mlir::omp::TaskgroupOp
 genTaskgroupOp(Fortran::lower::AbstractConverter &converter,
                Fortran::lower::SymMap &symTable,
                Fortran::semantics::SemanticsContext &semaCtx,
-               Fortran::lower::pft::Evaluation &eval, bool genNested,
-               mlir::Location loc, const List<Clause> &clauses) {
+               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+               const List<Clause> &clauses, const ConstructQueue &queue,
+               ConstructQueue::iterator item) {
   mlir::omp::TaskgroupClauseOps clauseOps;
   genTaskgroupClauses(converter, semaCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::TaskgroupOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_taskgroup)
-          .setGenNested(genNested)
           .setClauses(&clauses),
-      clauseOps);
+      queue, item, clauseOps);
 }
 
 static mlir::omp::TaskloopOp
@@ -1747,7 +1780,8 @@ genTaskloopOp(Fortran::lower::AbstractConverter &converter,
               Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-              const List<Clause> &clauses) {
+              const List<Clause> &clauses, const ConstructQueue &queue,
+              ConstructQueue::iterator item) {
   TODO(loc, "Taskloop construct");
 }
 
@@ -1756,7 +1790,8 @@ genTaskwaitOp(Fortran::lower::AbstractConverter &converter,
               Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-              const List<Clause> &clauses) {
+              const List<Clause> &clauses, const ConstructQueue &queue,
+              ConstructQueue::iterator item) {
   mlir::omp::TaskwaitClauseOps clauseOps;
   genTaskwaitClauses(converter, semaCtx, clauses, loc, clauseOps);
   return converter.getFirOpBuilder().create<mlir::omp::TaskwaitOp>(loc,
@@ -1767,7 +1802,8 @@ static mlir::omp::TaskyieldOp
 genTaskyieldOp(Fortran::lower::AbstractConverter &converter,
                Fortran::lower::SymMap &symTable,
                Fortran::semantics::SemanticsContext &semaCtx,
-               Fortran::lower::pft::Evaluation &eval, mlir::Location loc) {
+               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+               const ConstructQueue &queue, ConstructQueue::iterator item) {
   return converter.getFirOpBuilder().create<mlir::omp::TaskyieldOp>(loc);
 }
 
@@ -1775,9 +1811,9 @@ static mlir::omp::TeamsOp
 genTeamsOp(Fortran::lower::AbstractConverter &converter,
            Fortran::lower::SymMap &symTable,
            Fortran::semantics::SemanticsContext &semaCtx,
-           Fortran::lower::pft::Evaluation &eval, bool genNested,
-           mlir::Location loc, const List<Clause> &clauses,
-           bool outerCombined = false) {
+           Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+           const List<Clause> &clauses, const ConstructQueue &queue,
+           ConstructQueue::iterator item, bool outerCombined = false) {
   Fortran::lower::StatementContext stmtCtx;
   mlir::omp::TeamsClauseOps clauseOps;
   genTeamsClauses(converter, semaCtx, stmtCtx, clauses, loc, clauseOps);
@@ -1785,10 +1821,9 @@ genTeamsOp(Fortran::lower::AbstractConverter &converter,
   return genOpWithBody<mlir::omp::TeamsOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_teams)
-          .setGenNested(genNested)
           .setOuterCombined(outerCombined)
           .setClauses(&clauses),
-      clauseOps);
+      queue, item, clauseOps);
 }
 
 static mlir::omp::WsloopOp
@@ -1796,7 +1831,8 @@ genWsloopOp(Fortran::lower::AbstractConverter &converter,
             Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
             Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-            const List<Clause> &clauses) {
+            const List<Clause> &clauses, const ConstructQueue &queue,
+            ConstructQueue::iterator item) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   DataSharingProcessor dsp(converter, semaCtx, clauses, eval);
   dsp.processStep1();
@@ -1837,7 +1873,8 @@ genWsloopOp(Fortran::lower::AbstractConverter &converter,
                      .setClauses(&clauses)
                      .setDataSharingProcessor(&dsp)
                      .setReductions(&reductionSyms, &reductionTypes)
-                     .setGenRegionEntryCb(ivCallback));
+                     .setGenRegionEntryCb(ivCallback),
+                 queue, item);
   return wsloopOp;
 }
 
@@ -1845,13 +1882,13 @@ genWsloopOp(Fortran::lower::AbstractConverter &converter,
 // Code generation functions for composite constructs
 //===----------------------------------------------------------------------===//
 
-static void
-genCompositeDistributeParallelDo(Fortran::lower::AbstractConverter &converter,
-                                 Fortran::lower::SymMap &symTable,
-                                 Fortran::semantics::SemanticsContext &semaCtx,
-                                 Fortran::lower::pft::Evaluation &eval,
-                                 const List<Clause> &clauses,
-                                 mlir::Location loc) {
+static void genCompositeDistributeParallelDo(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::lower::SymMap &symTable,
+    Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+    const List<Clause> &clauses, const ConstructQueue &queue,
+    ConstructQueue::iterator item) {
   TODO(loc, "Composite DISTRIBUTE PARALLEL DO");
 }
 
@@ -1859,8 +1896,9 @@ static void genCompositeDistributeParallelDoSimd(
     Fortran::lower::AbstractConverter &converter,
     Fortran::lower::SymMap &symTable,
     Fortran::semantics::SemanticsContext &semaCtx,
-    Fortran::lower::pft::Evaluation &eval, const List<Clause> &clauses,
-    mlir::Location loc) {
+    Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+    const List<Clause> &clauses, const ConstructQueue &queue,
+    ConstructQueue::iterator item) {
   TODO(loc, "Composite DISTRIBUTE PARALLEL DO SIMD");
 }
 
@@ -1869,7 +1907,9 @@ genCompositeDistributeSimd(Fortran::lower::AbstractConverter &converter,
                            Fortran::lower::SymMap &symTable,
                            Fortran::semantics::SemanticsContext &semaCtx,
                            Fortran::lower::pft::Evaluation &eval,
-                           const List<Clause> &clauses, mlir::Location loc) {
+                           mlir::Location loc, const List<Clause> &clauses,
+                           const ConstructQueue &queue,
+                           ConstructQueue::iterator item) {
   TODO(loc, "Composite DISTRIBUTE SIMD");
 }
 
@@ -1877,8 +1917,9 @@ static void genCompositeDoSimd(Fortran::lower::AbstractConverter &converter,
                                Fortran::lower::SymMap &symTable,
                                Fortran::semantics::SemanticsContext &semaCtx,
                                Fortran::lower::pft::Evaluation &eval,
-                               const List<Clause> &clauses,
-                               mlir::Location loc) {
+                               mlir::Location loc, const List<Clause> &clauses,
+                               const ConstructQueue &queue,
+                               ConstructQueue::iterator item) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processTODO<clause::Aligned, clause::Allocate, clause::Linear,
                  clause::Order, clause::Safelen, clause::Simdlen>(
@@ -1891,7 +1932,7 @@ static void genCompositeDoSimd(Fortran::lower::AbstractConverter &converter,
   // When support for vectorization is enabled, then we need to add handling of
   // if clause. Currently if clause can be skipped because we always assume
   // SIMD length = 1.
-  genWsloopOp(converter, symTable, semaCtx, eval, loc, clauses);
+  genWsloopOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
 }
 
 static void
@@ -1899,10 +1940,126 @@ genCompositeTaskloopSimd(Fortran::lower::AbstractConverter &converter,
                          Fortran::lower::SymMap &symTable,
                          Fortran::semantics::SemanticsContext &semaCtx,
                          Fortran::lower::pft::Evaluation &eval,
-                         const List<Clause> &clauses, mlir::Location loc) {
+                         mlir::Location loc, const List<Clause> &clauses,
+                         const ConstructQueue &queue,
+                         ConstructQueue::iterator item) {
   TODO(loc, "Composite TASKLOOP SIMD");
 }
 
+//===----------------------------------------------------------------------===//
+// Dispatch
+//===----------------------------------------------------------------------===//
+
+static void genOMPDispatch(Fortran::lower::AbstractConverter &converter,
+                           Fortran::lower::SymMap &symTable,
+                           Fortran::semantics::SemanticsContext &semaCtx,
+                           Fortran::lower::pft::Evaluation &eval,
+                           mlir::Location loc, const ConstructQueue &queue,
+                           ConstructQueue::iterator item) {
+  assert(item != queue.end());
+  const List<Clause> &clauses = item->clauses;
+
+  switch (llvm::omp::Directive dir = item->id) {
+  case llvm::omp::Directive::OMPD_distribute:
+    genDistributeOp(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                    item);
+    break;
+  case llvm::omp::Directive::OMPD_do:
+    genWsloopOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_loop:
+  case llvm::omp::Directive::OMPD_masked:
+    TODO(loc, "Unhandled loop directive (" +
+                  llvm::omp::getOpenMPDirectiveName(dir) + ")");
+    break;
+  case llvm::omp::Directive::OMPD_master:
+    genMasterOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_ordered:
+    genOrderedRegionOp(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                       item);
+    break;
+  case llvm::omp::Directive::OMPD_parallel:
+    genParallelOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item,
+                  /*outerCombined=*/false);
+    break;
+  case llvm::omp::Directive::OMPD_sections:
+    genSectionsOp(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                  item);
+    break;
+  case llvm::omp::Directive::OMPD_simd:
+    genSimdOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_single:
+    genSingleOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_target:
+    genTargetOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item,
+                /*outerCombined=*/false);
+    break;
+  case llvm::omp::Directive::OMPD_target_data:
+    genTargetDataOp(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                    item);
+    break;
+  case llvm::omp::Directive::OMPD_target_enter_data:
+    genTargetEnterExitUpdateDataOp<mlir::omp::TargetEnterDataOp>(
+        converter, symTable, semaCtx, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_target_exit_data:
+    genTargetEnterExitUpdateDataOp<mlir::omp::TargetExitDataOp>(
+        converter, symTable, semaCtx, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_target_update:
+    genTargetEnterExitUpdateDataOp<mlir::omp::TargetUpdateOp>(
+        converter, symTable, semaCtx, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_task:
+    genTaskOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_taskgroup:
+    genTaskgroupOp(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                   item);
+    break;
+  case llvm::omp::Directive::OMPD_taskloop:
+    genTaskloopOp(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                  item);
+    break;
+  case llvm::omp::Directive::OMPD_teams:
+    genTeamsOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  // case llvm::omp::Directive::OMPD_workdistribute:
+  case llvm::omp::Directive::OMPD_workshare:
+    // FIXME: Workshare is not a commonly used OpenMP construct, an
+    // implementation for this feature will come later. For the codes
+    // that use this construct, add a single construct for now.
+    genSingleOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  // Composite constructs
+  case llvm::omp::Directive::OMPD_distribute_parallel_do:
+    genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval, loc,
+                                     clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_distribute_parallel_do_simd:
+    genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval,
+                                         loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_distribute_simd:
+    genCompositeDistributeSimd(converter, symTable, semaCtx, eval, loc, clauses,
+                               queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_do_simd:
+    genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                       item);
+    break;
+  case llvm::omp::Directive::OMPD_taskloop_simd:
+    genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, loc, clauses,
+                             queue, item);
+    break;
+  default:
+    break;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // OpenMPDeclarativeConstruct visitors
 //===----------------------------------------------------------------------===//
@@ -2013,36 +2170,45 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
       semaCtx);
   mlir::Location currentLocation = converter.genLocation(directive.source);
 
+  ConstructQueue queue{{DirectiveWithClauses{directive.v, clauses}}};
+
   switch (directive.v) {
   default:
     break;
   case llvm::omp::Directive::OMPD_barrier:
-    genBarrierOp(converter, symTable, semaCtx, eval, currentLocation);
+    genBarrierOp(converter, symTable, semaCtx, eval, currentLocation, queue,
+                 queue.begin());
     break;
   case llvm::omp::Directive::OMPD_taskwait:
-    genTaskwaitOp(converter, symTable, semaCtx, eval, currentLocation, clauses);
+    genTaskwaitOp(converter, symTable, semaCtx, eval, currentLocation, clauses,
+                  queue, queue.begin());
     break;
   case llvm::omp::Directive::OMPD_taskyield:
-    genTaskyieldOp(converter, symTable, semaCtx, eval, currentLocation);
+    genTaskyieldOp(converter, symTable, semaCtx, eval, currentLocation, queue,
+                   queue.begin());
     break;
   case llvm::omp::Directive::OMPD_target_data:
-    genTargetDataOp(converter, symTable, semaCtx, eval, /*genNested=*/true,
-                    currentLocation, clauses);
+    genTargetDataOp(converter, symTable, semaCtx, eval, currentLocation,
+                    clauses, queue, queue.begin());
     break;
   case llvm::omp::Directive::OMPD_target_enter_data:
     genTargetEnterExitUpdateDataOp<mlir::omp::TargetEnterDataOp>(
-        converter, symTable, semaCtx, currentLocation, clauses);
+        converter, symTable, semaCtx, currentLocation, clauses, queue,
+        queue.begin());
     break;
   case llvm::omp::Directive::OMPD_target_exit_data:
     genTargetEnterExitUpdateDataOp<mlir::omp::TargetExitDataOp>(
-        converter, symTable, semaCtx, currentLocation, clauses);
+        converter, symTable, semaCtx, currentLocation, clauses, queue,
+        queue.begin());
     break;
   case llvm::omp::Directive::OMPD_target_update:
     genTargetEnterExitUpdateDataOp<mlir::omp::TargetUpdateOp>(
-        converter, symTable, semaCtx, currentLocation, clauses);
+        converter, symTable, semaCtx, currentLocation, clauses, queue,
+        queue.begin());
     break;
   case llvm::omp::Directive::OMPD_ordered:
-    genOrderedOp(converter, symTable, semaCtx, eval, currentLocation, clauses);
+    genOrderedOp(converter, symTable, semaCtx, eval, currentLocation, clauses,
+                 queue, queue.begin());
     break;
   }
 }
@@ -2066,8 +2232,11 @@ genOMP(Fortran::lower::AbstractConverter &converter,
                             [&](auto &&s) { return makeClause(s.v, semaCtx); })
                  : List<Clause>{};
   mlir::Location currentLocation = converter.genLocation(verbatim.source);
+
+  ConstructQueue queue{
+      DirectiveWithClauses{llvm::omp::Directive::OMPD_flush, clauses}};
   genFlushOp(converter, symTable, semaCtx, eval, currentLocation, objects,
-             clauses);
+             clauses, queue, queue.begin());
 }
 
 static void
@@ -2210,75 +2379,13 @@ genOMP(Fortran::lower::AbstractConverter &converter,
     }
   }
 
-  std::optional<llvm::omp::Directive> nextDir = origDirective;
-  bool outermostLeafConstruct = true;
-  while (nextDir) {
-    llvm::omp::Directive leafDir;
-    std::tie(leafDir, nextDir) = splitCombinedDirective(*nextDir);
-    const bool genNested = !nextDir;
-    const bool outerCombined = outermostLeafConstruct && nextDir.has_value();
-    switch (leafDir) {
-    case llvm::omp::Directive::OMPD_master:
-      // 2.16 MASTER construct.
-      genMasterOp(converter, symTable, semaCtx, eval, genNested,
-                  currentLocation);
-      break;
-    case llvm::omp::Directive::OMPD_ordered:
-      // 2.17.9 ORDERED construct.
-      genOrderedRegionOp(converter, symTable, semaCtx, eval, genNested,
-                         currentLocation, clauses);
-      break;
-    case llvm::omp::Directive::OMPD_parallel:
-      // 2.6 PARALLEL construct.
-      genParallelOp(converter, symTable, semaCtx, eval, genNested,
-                    currentLocation, clauses, outerCombined);
-      break;
-    case llvm::omp::Directive::OMPD_single:
-      // 2.8.2 SINGLE construct.
-      genSingleOp(converter, symTable, semaCtx, eval, genNested,
-                  currentLocation, clauses);
-      break;
-    case llvm::omp::Directive::OMPD_target:
-      // 2.12.5 TARGET construct.
-      genTargetOp(converter, symTable, semaCtx, eval, genNested,
-                  currentLocation, clauses, outerCombined);
-      break;
-    case llvm::omp::Directive::OMPD_target_data:
-      // 2.12.2 TARGET DATA construct.
-      genTargetDataOp(converter, symTable, semaCtx, eval, genNested,
-                      currentLocation, clauses);
-      break;
-    case llvm::omp::Directive::OMPD_task:
-      // 2.10.1 TASK construct.
-      genTaskOp(converter, symTable, semaCtx, eval, genNested, currentLocation,
-                clauses);
-      break;
-    case llvm::omp::Directive::OMPD_taskgroup:
-      // 2.17.6 TASKGROUP construct.
-      genTaskgroupOp(converter, symTable, semaCtx, eval, genNested,
-                     currentLocation, clauses);
-      break;
-    case llvm::omp::Directive::OMPD_teams:
-      // 2.7 TEAMS construct.
-      // FIXME Pass the outerCombined argument or rename it to better describe
-      // what it represents if it must always be `false` in this context.
-      genTeamsOp(converter, symTable, semaCtx, eval, genNested, currentLocation,
-                 clauses);
-      break;
-    case llvm::omp::Directive::OMPD_workshare:
-      // 2.8.3 WORKSHARE construct.
-      // FIXME: Workshare is not a commonly used OpenMP construct, an
-      // implementation for this feature will come later. For the codes
-      // that use this construct, add a single construct for now.
-      genSingleOp(converter, symTable, semaCtx, eval, genNested,
-                  currentLocation, clauses);
-      break;
-    default:
-      llvm_unreachable("Unexpected block construct");
-      break;
-    }
-    outermostLeafConstruct = false;
-  }
+  llvm::omp::Directive directive =
+      std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v;
+  ConstructQueue queue;
+  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
+                         directive, clauses, queue);
+  genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
+                 queue.begin());
 }
 
 static void
@@ -2291,10 +2398,15 @@ genOMP(Fortran::lower::AbstractConverter &converter,
       std::get<Fortran::parser::OmpCriticalDirective>(criticalConstruct.t);
   List<Clause> clauses =
       makeClauses(std::get<Fortran::parser::OmpClauseList>(cd.t), semaCtx);
+
+  ConstructQueue queue;
+  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
+                         llvm::omp::Directive::OMPD_critical, clauses, queue);
+
   const auto &name = std::get<std::optional<Fortran::parser::Name>>(cd.t);
   mlir::Location currentLocation = converter.getCurrentLocation();
-  genCriticalOp(converter, symTable, semaCtx, eval, /*genNested=*/true,
-                currentLocation, clauses, name);
+  genCriticalOp(converter, symTable, semaCtx, eval, currentLocation, clauses,
+                queue, queue.begin(), name);
 }
 
 static void
@@ -2315,14 +2427,6 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
       std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t);
   List<Clause> clauses = makeClauses(
       std::get<Fortran::parser::OmpClauseList>(beginLoopDirective.t), semaCtx);
-  mlir::Location currentLocation =
-      converter.genLocation(beginLoopDirective.source);
-  const auto origDirective =
-      std::get<Fortran::parser::OmpLoopDirective>(beginLoopDirective.t).v;
-
-  assert(llvm::omp::loopConstructSet.test(origDirective) &&
-         "Expected loop construct");
-
   if (auto &endLoopDirective =
           std::get<std::optional<Fortran::parser::OmpEndLoopDirective>>(
               loopConstruct.t)) {
@@ -2331,101 +2435,16 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
         semaCtx));
   }
 
-  std::optional<llvm::omp::Directive> nextDir = origDirective;
-  while (nextDir) {
-    llvm::omp::Directive leafDir;
-    std::tie(leafDir, nextDir) = splitCombinedDirective(*nextDir);
-    if (llvm::omp::compositeConstructSet.test(leafDir)) {
-      assert(!nextDir && "Composite construct cannot be split");
-      switch (leafDir) {
-      case llvm::omp::Directive::OMPD_distribute_parallel_do:
-        // 2.9.4.3 DISTRIBUTE PARALLEL Worksharing-Loop construct.
-        genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval,
-                                         clauses, currentLocation);
-        break;
-      case llvm::omp::Directive::OMPD_distribute_parallel_do_simd:
-        // 2.9.4.4 DISTRIBUTE PARALLEL Worksharing-Loop SIMD construct.
-        genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval,
-                                             clauses, currentLocation);
-        break;
-      case llvm::omp::Directive::OMPD_distribute_simd:
-        // 2.9.4.2 DISTRIBUTE SIMD construct.
-        genCompositeDistributeSimd(converter, symTable, semaCtx, eval, clauses,
-                                   currentLocation);
-        break;
-      case llvm::omp::Directive::OMPD_do_simd:
-        // 2.9.3.2 Worksharing-Loop SIMD construct.
-        genCompositeDoSimd(converter, symTable, semaCtx, eval, clauses,
-                           currentLocation);
-        break;
-      case llvm::omp::Directive::OMPD_taskloop_simd:
-        // 2.10.3 TASKLOOP SIMD construct.
-        genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, clauses,
-                                 currentLocation);
-        break;
-      default:
-        llvm_unreachable("Unexpected composite construct");
-      }
-    } else {
-      const bool genNested = !nextDir;
-      switch (leafDir) {
-      case llvm::omp::Directive::OMPD_distribute:
-        // 2.9.4.1 DISTRIBUTE construct.
-        genDistributeOp(converter, symTable, semaCtx, eval, genNested,
-                        currentLocation, clauses);
-        break;
-      case llvm::omp::Directive::OMPD_do:
-        // 2.9.2 Worksharing-Loop construct.
-        genWsloopOp(converter, symTable, semaCtx, eval, currentLocation,
-                    clauses);
-        break;
-      case llvm::omp::Directive::OMPD_parallel:
-        // 2.6 PARALLEL construct.
-        // FIXME This is not necessarily always the outer leaf construct of a
-        // combined construct in this constext (e.g. distribute parallel do).
-        // Maybe rename the argument if it represents something else or
-        // initialize it properly.
-        genParallelOp(converter, symTable, semaCtx, eval, genNested,
-                      currentLocation, clauses,
-                      /*outerCombined=*/true);
-        break;
-      case llvm::omp::Directive::OMPD_simd:
-        // 2.9.3.1 SIMD construct.
-        genSimdOp(converter, symTable, semaCtx, eval, currentLocation, clauses);
-        break;
-      case llvm::omp::Directive::OMPD_target:
-        // 2.12.5 TARGET construct.
-        genTargetOp(converter, symTable, semaCtx, eval, genNested,
-                    currentLocation, clauses, /*outerCombined=*/true);
-        break;
-      case llvm::omp::Directive::OMPD_taskloop:
-        // 2.10.2 TASKLOOP construct.
-        genTaskloopOp(converter, symTable, semaCtx, eval, currentLocation,
-                      clauses);
-        break;
-      case llvm::omp::Directive::OMPD_teams:
-        // 2.7 TEAMS construct.
-        // FIXME This is not necessarily always the outer leaf construct of a
-        // combined construct in this constext (e.g. target teams distribute).
-        // Maybe rename the argument if it represents something else or
-        // initialize it properly.
-        genTeamsOp(converter, symTable, semaCtx, eval, genNested,
-                   currentLocation, clauses, /*outerCombined=*/true);
-        break;
-      case llvm::omp::Directive::OMPD_loop:
-      case llvm::omp::Directive::OMPD_masked:
-      case llvm::omp::Directive::OMPD_master:
-      case llvm::omp::Directive::OMPD_tile:
-      case llvm::omp::Directive::OMPD_unroll:
-        TODO(currentLocation, "Unhandled loop directive (" +
-                                  llvm::omp::getOpenMPDirectiveName(leafDir) +
-                                  ")");
-        break;
-      default:
-        llvm_unreachable("Unexpected loop construct");
-      }
-    }
-  }
+  mlir::Location currentLocation =
+      converter.genLocation(beginLoopDirective.source);
+
+  llvm::omp::Directive directive =
+      std::get<parser::OmpLoopDirective>(beginLoopDirective.t).v;
+  ConstructQueue queue;
+  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
+                         directive, clauses, queue);
+  genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
+                 queue.begin());
 }
 
 static void
@@ -2434,8 +2453,11 @@ genOMP(Fortran::lower::AbstractConverter &converter,
        Fortran::semantics::SemanticsContext &semaCtx,
        Fortran::lower::pft::Evaluation &eval,
        const Fortran::parser::OpenMPSectionConstruct &sectionConstruct) {
-  // SECTION constructs are handled as a part of SECTIONS.
-  llvm_unreachable("Unexpected standalone OMP SECTION");
+  mlir::Location loc = converter.getCurrentLocation();
+  ConstructQueue queue{
+      DirectiveWithClauses{llvm::omp::Directive::OMPD_section}};
+  genSectionOp(converter, symTable, semaCtx, eval, loc,
+               /*clauses=*/{}, queue, queue.begin());
 }
 
 static void
@@ -2454,39 +2476,15 @@ genOMP(Fortran::lower::AbstractConverter &converter,
   clauses.append(makeClauses(
       std::get<Fortran::parser::OmpClauseList>(endSectionsDirective.t),
       semaCtx));
-
-  // Process clauses before optional omp.parallel, so that new variables are
-  // allocated outside of the parallel region
   mlir::Location currentLocation = converter.getCurrentLocation();
-  mlir::omp::SectionsClauseOps clauseOps;
-  genSectionsClauses(converter, semaCtx, clauses, currentLocation, clauseOps);
-
-  // Parallel wrapper of PARALLEL SECTIONS construct
-  llvm::omp::Directive dir =
-      std::get<Fortran::parser::OmpSectionsDirective>(beginSectionsDirective.t)
-          .v;
-  if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
-    genParallelOp(converter, symTable, semaCtx, eval,
-                  /*genNested=*/false, currentLocation, clauses,
-                  /*outerCombined=*/true);
-  }
 
-  // SECTIONS construct.
-  genSectionsOp(converter, symTable, semaCtx, eval, currentLocation, clauseOps);
-
-  // Generate nested SECTION operations recursively.
-  const auto &sectionBlocks =
-      std::get<Fortran::parser::OmpSectionBlocks>(sectionsConstruct.t);
-  auto &firOpBuilder = converter.getFirOpBuilder();
-  auto ip = firOpBuilder.saveInsertionPoint();
-  for (const auto &[nblock, neval] :
-       llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) {
-    symTable.pushScope();
-    genSectionOp(converter, symTable, semaCtx, neval, /*genNested=*/true,
-                 currentLocation, clauses);
-    symTable.popScope();
-    firOpBuilder.restoreInsertionPoint(ip);
-  }
+  llvm::omp::Directive directive =
+      std::get<parser::OmpSectionsDirective>(beginSectionsDirective.t).v;
+  ConstructQueue queue;
+  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
+                         directive, clauses, queue);
+  genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
+                 queue.begin());
 }
 
 static void genOMP(Fortran::lower::AbstractConverter &converter,
diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
new file mode 100644
index 00000000000000..f8a18a1bed7ae6
--- /dev/null
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
@@ -0,0 +1,985 @@
+#ifndef LLVM_FRONTEND_OPENMP_COMPOUNDSPLITTERT_H
+#define LLVM_FRONTEND_OPENMP_COMPOUNDSPLITTERT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Frontend/OpenMP/ClauseT.h"
+#include "llvm/Frontend/OpenMP/OMP.h"
+
+#include <iterator>
+#include <list>
+#include <optional>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <variant>
+
+static inline llvm::ArrayRef<llvm::omp::Directive> getWorksharing() {
+  static llvm::omp::Directive worksharing[] = {
+      llvm::omp::Directive::OMPD_do,     llvm::omp::Directive::OMPD_for,
+      llvm::omp::Directive::OMPD_scope,  llvm::omp::Directive::OMPD_sections,
+      llvm::omp::Directive::OMPD_single, llvm::omp::Directive::OMPD_workshare,
+  };
+  return worksharing;
+}
+
+static inline llvm::ArrayRef<llvm::omp::Directive> getWorksharingLoop() {
+  static llvm::omp::Directive worksharingLoop[] = {
+      llvm::omp::Directive::OMPD_do,
+      llvm::omp::Directive::OMPD_for,
+  };
+  return worksharingLoop;
+}
+
+namespace detail {
+template <typename Container, typename Predicate>
+typename std::remove_reference_t<Container>::iterator
+find_unique(Container &&container, Predicate &&pred) {
+  auto first = std::find_if(container.begin(), container.end(), pred);
+  if (first == container.end())
+    return first;
+  auto second = std::find_if(std::next(first), container.end(), pred);
+  if (second == container.end())
+    return first;
+  return container.end();
+}
+} // namespace detail
+
+namespace tomp {
+
+template <typename ClauseType> struct DirectiveWithClauses {
+  llvm::omp::Directive id = llvm::omp::Directive::OMPD_unknown;
+  tomp::type::ListT<ClauseType> clauses;
+};
+
+template <typename ClauseType, typename HelperType>
+struct ConstructDecompositionT {
+  using ClauseTy = ClauseType;
+
+  using TypeTy = typename ClauseTy::TypeTy;
+  using IdTy = typename ClauseTy::IdTy;
+  using ExprTy = typename ClauseTy::ExprTy;
+  using HelperTy = HelperType;
+  using ObjectTy = tomp::ObjectT<IdTy, ExprTy>;
+
+  using ClauseSet = llvm::DenseSet<const ClauseTy *>;
+
+  ConstructDecompositionT(uint32_t ver, HelperType &hlp,
+                          llvm::omp::Directive dir,
+                          llvm::ArrayRef<ClauseTy> clauses)
+      : version(ver), construct(dir), helper(hlp) {
+    for (const ClauseTy &clause : clauses)
+      nodes.push_back(&clause);
+
+    bool success = split();
+    if (success) {
+      // Copy the broken down directives with their clauses to the
+      // output list. Copy by value, since we don't own the storage
+      // with the input clauses, and the internal representation uses
+      // clause addresses.
+      for (auto &leaf : leafs) {
+        output.push_back({leaf.id});
+        auto &dwc = output.back();
+        for (const ClauseTy *c : leaf.clauses)
+          dwc.clauses.push_back(*c);
+      }
+    }
+  }
+
+  tomp::ListT<DirectiveWithClauses<ClauseType>> output;
+
+private:
+  bool split();
+
+  struct LeafReprInternal {
+    llvm::omp::Directive id = llvm::omp::Directive::OMPD_unknown;
+    tomp::type::ListT<const ClauseTy *> clauses;
+  };
+
+  LeafReprInternal *findDirective(llvm::omp::Directive dirId) {
+    auto found = llvm::find_if(
+        leafs, [&](const LeafReprInternal &leaf) { return leaf.id == dirId; });
+    return found != leafs.end() ? &*found : nullptr;
+  }
+
+  ClauseSet *findClausesWith(const ObjectTy &object) {
+    if (auto found = syms.find(object.id()); found != syms.end())
+      return &found->second;
+    return nullptr;
+  }
+
+  template <typename S>
+  ClauseTy *makeClause(llvm::omp::Clause clauseId, S &&specific) {
+    implicit.push_back(ClauseTy{clauseId, std::move(specific)});
+    return &implicit.back();
+  }
+
+  void addClauseSymsToMap(const ObjectTy &object, const ClauseTy *);
+  void addClauseSymsToMap(const tomp::ObjectListT<IdTy, ExprTy> &objects,
+                          const ClauseTy *);
+  void addClauseSymsToMap(const TypeTy &item, const ClauseTy *);
+  void addClauseSymsToMap(const ExprTy &item, const ClauseTy *);
+  void addClauseSymsToMap(const tomp::clause::MapT<TypeTy, IdTy, ExprTy> &item,
+                          const ClauseTy *);
+
+  template <typename U>
+  void addClauseSymsToMap(const std::optional<U> &item, const ClauseTy *);
+  template <typename U>
+  void addClauseSymsToMap(const tomp::ListT<U> &item, const ClauseTy *);
+  template <typename... U, size_t... Is>
+  void addClauseSymsToMap(const std::tuple<U...> &item, const ClauseTy *,
+                          std::index_sequence<Is...> = {});
+  template <typename U>
+  std::enable_if_t<std::is_enum_v<llvm::remove_cvref_t<U>>, void>
+  addClauseSymsToMap(U &&item, const ClauseTy *);
+
+  template <typename U>
+  std::enable_if_t<llvm::remove_cvref_t<U>::EmptyTrait::value, void>
+  addClauseSymsToMap(U &&item, const ClauseTy *);
+
+  template <typename U>
+  std::enable_if_t<llvm::remove_cvref_t<U>::IncompleteTrait::value, void>
+  addClauseSymsToMap(U &&item, const ClauseTy *);
+
+  template <typename U>
+  std::enable_if_t<llvm::remove_cvref_t<U>::WrapperTrait::value, void>
+  addClauseSymsToMap(U &&item, const ClauseTy *);
+
+  template <typename U>
+  std::enable_if_t<llvm::remove_cvref_t<U>::TupleTrait::value, void>
+  addClauseSymsToMap(U &&item, const ClauseTy *);
+
+  template <typename U>
+  std::enable_if_t<llvm::remove_cvref_t<U>::UnionTrait::value, void>
+  addClauseSymsToMap(U &&item, const ClauseTy *);
+
+  // Apply a clause to the only directive that allows it. If there are no
+  // directives that allow it, or if there is more that one, do not apply
+  // anything and return false, otherwise return true.
+  bool applyToUnique(const ClauseTy *node);
+
+  // Apply a clause to the first directive in given range that allows it.
+  // If such a directive does not exist, return false, otherwise return true.
+  template <typename Iterator>
+  bool applyToFirst(const ClauseTy *node, llvm::iterator_range<Iterator> range);
+
+  // Apply a clause to the innermost directive that allows it. If such a
+  // directive does not exist, return false, otherwise return true.
+  bool applyToInnermost(const ClauseTy *node);
+
+  // Apply a clause to the outermost directive that allows it. If such a
+  // directive does not exist, return false, otherwise return true.
+  bool applyToOutermost(const ClauseTy *node);
+
+  template <typename Predicate>
+  bool applyIf(const ClauseTy *node, Predicate shouldApply);
+
+  bool applyToAll(const ClauseTy *node);
+
+  template <typename Clause>
+  bool applyClause(Clause &&clause, const ClauseTy *node);
+
+  bool applyClause(const tomp::clause::CollapseT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::PrivateT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool
+  applyClause(const tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy> &clause,
+              const ClauseTy *);
+  bool
+  applyClause(const tomp::clause::LastprivateT<TypeTy, IdTy, ExprTy> &clause,
+              const ClauseTy *);
+  bool applyClause(const tomp::clause::SharedT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::DefaultT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool
+  applyClause(const tomp::clause::ThreadLimitT<TypeTy, IdTy, ExprTy> &clause,
+              const ClauseTy *);
+  bool applyClause(const tomp::clause::OrderT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::AllocateT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::ReductionT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::IfT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::LinearT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::NowaitT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+
+  uint32_t version;
+  llvm::omp::Directive construct;
+  HelperType &helper;
+  ListT<LeafReprInternal> leafs;
+  tomp::ListT<const ClauseTy *> nodes;
+  std::list<ClauseTy> implicit; // Container for materialized implicit clauses.
+                                // Inserting must preserve element addresses.
+  llvm::DenseMap<IdTy, ClauseSet> syms;
+  llvm::DenseSet<IdTy> mapBases;
+};
+
+// Deduction guide
+template <typename ClauseType, typename HelperType>
+ConstructDecompositionT(uint32_t, HelperType &, llvm::omp::Directive,
+                        llvm::ArrayRef<ClauseType>)
+    -> ConstructDecompositionT<ClauseType, HelperType>;
+
+template <typename C, typename H>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(const ObjectTy &object,
+                                                       const ClauseTy *node) {
+  syms[object.id()].insert(node);
+}
+
+template <typename C, typename H>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(
+    const tomp::ObjectListT<IdTy, ExprTy> &objects, const ClauseTy *node) {
+  for (auto &object : objects)
+    syms[object.id()].insert(node);
+}
+
+template <typename C, typename H>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(const TypeTy &item,
+                                                       const ClauseTy *node) {
+  // Nothing to do for types.
+}
+
+template <typename C, typename H>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(const ExprTy &item,
+                                                       const ClauseTy *node) {
+  // Nothing to do for expressions.
+}
+
+template <typename C, typename H>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(
+    const tomp::clause::MapT<TypeTy, IdTy, ExprTy> &item,
+    const ClauseTy *node) {
+  auto &objects = std::get<tomp::ObjectListT<IdTy, ExprTy>>(item.t);
+  addClauseSymsToMap(objects, node);
+  for (auto &object : objects) {
+    if (auto base = helper.getBaseObject(object))
+      mapBases.insert(base->id());
+  }
+}
+
+template <typename C, typename H>
+template <typename U>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(
+    const std::optional<U> &item, const ClauseTy *node) {
+  if (item)
+    addClauseSymsToMap(*item, node);
+}
+
+template <typename C, typename H>
+template <typename U>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(
+    const tomp::ListT<U> &item, const ClauseTy *node) {
+  for (auto &s : item)
+    addClauseSymsToMap(s, node);
+}
+
+template <typename C, typename H>
+template <typename... U, size_t... Is>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(
+    const std::tuple<U...> &item, const ClauseTy *node,
+    std::index_sequence<Is...>) {
+  (void)node; // Silence strange warning from GCC.
+  (addClauseSymsToMap(std::get<Is>(item), node), ...);
+}
+
+template <typename C, typename H>
+template <typename U>
+std::enable_if_t<std::is_enum_v<llvm::remove_cvref_t<U>>, void>
+ConstructDecompositionT<C, H>::addClauseSymsToMap(U &&item,
+                                                  const ClauseTy *node) {
+  // Nothing to do for enums.
+}
+
+template <typename C, typename H>
+template <typename U>
+std::enable_if_t<llvm::remove_cvref_t<U>::EmptyTrait::value, void>
+ConstructDecompositionT<C, H>::addClauseSymsToMap(U &&item,
+                                                  const ClauseTy *node) {
+  // Nothing to do for an empty class.
+}
+
+template <typename C, typename H>
+template <typename U>
+std::enable_if_t<llvm::remove_cvref_t<U>::IncompleteTrait::value, void>
+ConstructDecompositionT<C, H>::addClauseSymsToMap(U &&item,
+                                                  const ClauseTy *node) {
+  // Nothing to do for an incomplete class (they're empty).
+}
+
+template <typename C, typename H>
+template <typename U>
+std::enable_if_t<llvm::remove_cvref_t<U>::WrapperTrait::value, void>
+ConstructDecompositionT<C, H>::addClauseSymsToMap(U &&item,
+                                                  const ClauseTy *node) {
+  addClauseSymsToMap(item.v, node);
+}
+
+template <typename C, typename H>
+template <typename U>
+std::enable_if_t<llvm::remove_cvref_t<U>::TupleTrait::value, void>
+ConstructDecompositionT<C, H>::addClauseSymsToMap(U &&item,
+                                                  const ClauseTy *node) {
+  constexpr size_t tuple_size =
+      std::tuple_size_v<llvm::remove_cvref_t<decltype(item.t)>>;
+  addClauseSymsToMap(item.t, node, std::make_index_sequence<tuple_size>{});
+}
+
+template <typename C, typename H>
+template <typename U>
+std::enable_if_t<llvm::remove_cvref_t<U>::UnionTrait::value, void>
+ConstructDecompositionT<C, H>::addClauseSymsToMap(U &&item,
+                                                  const ClauseTy *node) {
+  std::visit([&](auto &&s) { addClauseSymsToMap(s, node); }, item.u);
+}
+
+// Apply a clause to the only directive that allows it. If there are no
+// directives that allow it, or if there is more that one, do not apply
+// anything and return false, otherwise return true.
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyToUnique(const ClauseTy *node) {
+  auto unique = detail::find_unique(leafs, [=](const auto &dirInfo) {
+    return llvm::omp::isAllowedClauseForDirective(dirInfo.id, node->id,
+                                                  version);
+  });
+
+  if (unique != leafs.end()) {
+    unique->clauses.push_back(node);
+    return true;
+  }
+  return false;
+}
+
+// Apply a clause to the first directive in given range that allows it.
+// If such a directive does not exist, return false, otherwise return true.
+template <typename C, typename H>
+template <typename Iterator>
+bool ConstructDecompositionT<C, H>::applyToFirst(
+    const ClauseTy *node, llvm::iterator_range<Iterator> range) {
+  if (range.empty())
+    return false;
+
+  for (auto &dwc : range) {
+    if (!llvm::omp::isAllowedClauseForDirective(dwc.id, node->id, version))
+      continue;
+    dwc.clauses.push_back(node);
+    return true;
+  }
+  return false;
+}
+
+// Apply a clause to the innermost directive that allows it. If such a
+// directive does not exist, return false, otherwise return true.
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyToInnermost(const ClauseTy *node) {
+  return applyToFirst(node, llvm::reverse(leafs));
+}
+
+// Apply a clause to the outermost directive that allows it. If such a
+// directive does not exist, return false, otherwise return true.
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyToOutermost(const ClauseTy *node) {
+  return applyToFirst(node, llvm::iterator_range(leafs));
+}
+
+template <typename C, typename H>
+template <typename Predicate>
+bool ConstructDecompositionT<C, H>::applyIf(const ClauseTy *node,
+                                            Predicate shouldApply) {
+  bool applied = false;
+  for (auto &dwc : leafs) {
+    if (!llvm::omp::isAllowedClauseForDirective(dwc.id, node->id, version))
+      continue;
+    if (!shouldApply(dwc))
+      continue;
+    dwc.clauses.push_back(node);
+    applied = true;
+  }
+
+  return applied;
+}
+
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyToAll(const ClauseTy *node) {
+  return applyIf(node, [](auto) { return true; });
+}
+
+template <typename C, typename H>
+template <typename Clause>
+bool ConstructDecompositionT<C, H>::applyClause(Clause &&clause,
+                                                const ClauseTy *node) {
+  // The default behavior is to find the unique directive to which the
+  // given clause may be applied. If there are no such directives, or
+  // if there are multiple ones, flag an error.
+  // From "OpenMP Application Programming Interface", Version 5.2:
+  // S Some clauses are permitted only on a single leaf construct of the
+  // S combined or composite construct, in which case the effect is as if
+  // S the clause is applied to that specific construct. (p339, 31-33)
+  if (applyToUnique(node))
+    return true;
+
+  return false;
+}
+
+// COLLAPSE
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::CollapseT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // Apply COLLAPSE to the innermost directive. If it's not one that
+  // allows it flag an error.
+  if (!leafs.empty()) {
+    auto &last = leafs.back();
+
+    if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) {
+      last.clauses.push_back(node);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+// PRIVATE
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::PrivateT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  return applyToInnermost(node);
+}
+
+// FIRSTPRIVATE
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  bool applied = false;
+
+  // S Section 17.2
+  // S The effect of the firstprivate clause is as if it is applied to one
+  // S or more leaf constructs as follows:
+
+  // S - To the distribute construct if it is among the constituent constructs;
+  // S - To the teams construct if it is among the constituent constructs and
+  // S   the distribute construct is not;
+  auto hasDistribute = findDirective(llvm::omp::OMPD_distribute);
+  auto hasTeams = findDirective(llvm::omp::OMPD_teams);
+  if (hasDistribute != nullptr) {
+    hasDistribute->clauses.push_back(node);
+    applied = true;
+    // S If the teams construct is among the constituent constructs and the
+    // S effect is not as if the firstprivate clause is applied to it by the
+    // S above rules, then the effect is as if the shared clause with the
+    // S same list item is applied to the teams construct.
+    if (hasTeams != nullptr) {
+      auto *shared = makeClause(
+          llvm::omp::Clause::OMPC_shared,
+          tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/clause.v});
+      hasTeams->clauses.push_back(shared);
+    }
+  } else if (hasTeams != nullptr) {
+    hasTeams->clauses.push_back(node);
+    applied = true;
+  }
+
+  // S - To a worksharing construct that accepts the clause if one is among
+  // S   the constituent constructs;
+  auto findWorksharing = [&]() {
+    auto worksharing = getWorksharing();
+    for (auto &dwc : leafs) {
+      auto found = llvm::find(worksharing, dwc.id);
+      if (found != std::end(worksharing))
+        return &dwc;
+    }
+    return static_cast<typename decltype(leafs)::value_type *>(nullptr);
+  };
+
+  auto hasWorksharing = findWorksharing();
+  if (hasWorksharing != nullptr) {
+    hasWorksharing->clauses.push_back(node);
+    applied = true;
+  }
+
+  // S - To the taskloop construct if it is among the constituent constructs;
+  auto hasTaskloop = findDirective(llvm::omp::OMPD_taskloop);
+  if (hasTaskloop != nullptr) {
+    hasTaskloop->clauses.push_back(node);
+    applied = true;
+  }
+
+  // S - To the parallel construct if it is among the constituent constructs
+  // S   and neither a taskloop construct nor a worksharing construct that
+  // S   accepts the clause is among them;
+  auto hasParallel = findDirective(llvm::omp::OMPD_parallel);
+  if (hasParallel != nullptr) {
+    if (hasTaskloop == nullptr && hasWorksharing == nullptr) {
+      hasParallel->clauses.push_back(node);
+      applied = true;
+    } else {
+      // S If the parallel construct is among the constituent constructs and
+      // S the effect is not as if the firstprivate clause is applied to it by
+      // S the above rules, then the effect is as if the shared clause with
+      // S the same list item is applied to the parallel construct.
+      auto *shared = makeClause(
+          llvm::omp::Clause::OMPC_shared,
+          tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/clause.v});
+      hasParallel->clauses.push_back(shared);
+    }
+  }
+
+  // S - To the target construct if it is among the constituent constructs
+  // S   and the same list item neither appears in a lastprivate clause nor
+  // S   is the base variable or base pointer of a list item that appears in
+  // S   a map clause.
+  auto inLastprivate = [&](const ObjectTy &object) {
+    if (ClauseSet *set = findClausesWith(object)) {
+      return llvm::find_if(*set, [](const ClauseTy *c) {
+               return c->id == llvm::omp::Clause::OMPC_lastprivate;
+             }) != set->end();
+    }
+    return false;
+  };
+
+  auto hasTarget = findDirective(llvm::omp::OMPD_target);
+  if (hasTarget != nullptr) {
+    tomp::ObjectListT<IdTy, ExprTy> objects;
+    llvm::copy_if(
+        clause.v, std::back_inserter(objects), [&](const ObjectTy &object) {
+          return !inLastprivate(object) && !mapBases.contains(object.id());
+        });
+    if (!objects.empty()) {
+      auto *firstp = makeClause(
+          llvm::omp::Clause::OMPC_firstprivate,
+          tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy>{/*List=*/objects});
+      hasTarget->clauses.push_back(firstp);
+      applied = true;
+    }
+  }
+
+  return applied;
+}
+
+// LASTPRIVATE
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::LastprivateT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  bool applied = false;
+
+  // S The effect of the lastprivate clause is as if it is applied to all leaf
+  // S constructs that permit the clause.
+  applied = applyToAll(node);
+  if (!applied)
+    return false;
+
+  auto inFirstprivate = [&](const ObjectTy &object) {
+    if (ClauseSet *set = findClausesWith(object)) {
+      return llvm::find_if(*set, [](const ClauseTy *c) {
+               return c->id == llvm::omp::Clause::OMPC_firstprivate;
+             }) != set->end();
+    }
+    return false;
+  };
+
+  auto &objects = std::get<tomp::ObjectListT<IdTy, ExprTy>>(clause.t);
+
+  // Prepare list of objects that could end up in a SHARED clause.
+  tomp::ObjectListT<IdTy, ExprTy> sharedObjects;
+  llvm::copy_if(
+      objects, std::back_inserter(sharedObjects),
+      [&](const ObjectTy &object) { return !inFirstprivate(object); });
+
+  if (!sharedObjects.empty()) {
+    // S If the parallel construct is among the constituent constructs and the
+    // S list item is not also specified in the firstprivate clause, then the
+    // S effect of the lastprivate clause is as if the shared clause with the
+    // S same list item is applied to the parallel construct.
+    if (auto hasParallel = findDirective(llvm::omp::OMPD_parallel)) {
+      auto *shared = makeClause(
+          llvm::omp::Clause::OMPC_shared,
+          tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/sharedObjects});
+      hasParallel->clauses.push_back(shared);
+      applied = true;
+    }
+
+    // S If the teams construct is among the constituent constructs and the
+    // S list item is not also specified in the firstprivate clause, then the
+    // S effect of the lastprivate clause is as if the shared clause with the
+    // S same list item is applied to the teams construct.
+    if (auto hasTeams = findDirective(llvm::omp::OMPD_teams)) {
+      auto *shared = makeClause(
+          llvm::omp::Clause::OMPC_shared,
+          tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/sharedObjects});
+      hasTeams->clauses.push_back(shared);
+      applied = true;
+    }
+  }
+
+  // S If the target construct is among the constituent constructs and the
+  // S list item is not the base variable or base pointer of a list item that
+  // S appears in a map clause, the effect of the lastprivate clause is as if
+  // S the same list item appears in a map clause with a map-type of tofrom.
+  if (auto hasTarget = findDirective(llvm::omp::OMPD_target)) {
+    tomp::ObjectListT<IdTy, ExprTy> tofrom;
+    llvm::copy_if(objects, std::back_inserter(tofrom),
+                  [&](const ObjectTy &object) {
+                    return !mapBases.contains(object.id());
+                  });
+
+    if (!tofrom.empty()) {
+      using MapType =
+          typename tomp::clause::MapT<TypeTy, IdTy, ExprTy>::MapType;
+      auto *map =
+          makeClause(llvm::omp::Clause::OMPC_map,
+                     tomp::clause::MapT<TypeTy, IdTy, ExprTy>{
+                         {/*MapType=*/MapType::Tofrom,
+                          /*MapTypeModifier=*/std::nullopt,
+                          /*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt,
+                          /*LocatorList=*/std::move(tofrom)}});
+      hasTarget->clauses.push_back(map);
+      applied = true;
+    }
+  }
+
+  return applied;
+}
+
+// SHARED
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::SharedT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // Apply SHARED to the all leafs that allow it.
+  return applyToAll(node);
+}
+
+// DEFAULT
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::DefaultT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // Apply DEFAULT to the all leafs that allow it.
+  return applyToAll(node);
+}
+
+// THREAD_LIMIT
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::ThreadLimitT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // Apply THREAD_LIMIT to the all leafs that allow it.
+  return applyToAll(node);
+}
+
+// ORDER
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::OrderT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // Apply ORDER to the all leafs that allow it.
+  return applyToAll(node);
+}
+
+// ALLOCATE
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::AllocateT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // This one needs to be applied at the end, once we know which clauses are
+  // assigned to which leaf constructs.
+
+  // S The effect of the allocate clause is as if it is applied to all leaf
+  // S constructs that permit the clause and to which a data-sharing attribute
+  // S clause that may create a private copy of the same list item is applied.
+
+  auto canMakePrivateCopy = [](llvm::omp::Clause id) {
+    switch (id) {
+    case llvm::omp::Clause::OMPC_firstprivate:
+    case llvm::omp::Clause::OMPC_lastprivate:
+    case llvm::omp::Clause::OMPC_private:
+      return true;
+    default:
+      return false;
+    }
+  };
+
+  bool applied = applyIf(node, [&](const auto &dwc) {
+    return llvm::any_of(dwc.clauses, [&](const ClauseTy *n) {
+      return canMakePrivateCopy(n->id);
+    });
+  });
+
+  return applied;
+}
+
+// REDUCTION
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::ReductionT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // S The effect of the reduction clause is as if it is applied to all leaf
+  // S constructs that permit the clause, except for the following constructs:
+  // S - The parallel construct, when combined with the sections, worksharing-
+  // S   loop, loop, or taskloop construct; and
+  // S - The teams construct, when combined with the loop construct.
+  bool applyToParallel = true, applyToTeams = true;
+
+  auto hasParallel = findDirective(llvm::omp::Directive::OMPD_parallel);
+  if (hasParallel) {
+    auto exclusions = llvm::concat<const llvm::omp::Directive>(
+        getWorksharingLoop(), llvm::ArrayRef{
+                                  llvm::omp::Directive::OMPD_loop,
+                                  llvm::omp::Directive::OMPD_sections,
+                                  llvm::omp::Directive::OMPD_taskloop,
+                              });
+    auto present = [&](llvm::omp::Directive id) {
+      return findDirective(id) != nullptr;
+    };
+
+    if (llvm::any_of(exclusions, present))
+      applyToParallel = false;
+  }
+
+  auto hasTeams = findDirective(llvm::omp::Directive::OMPD_teams);
+  if (hasTeams) {
+    // The only exclusion is OMPD_loop.
+    if (findDirective(llvm::omp::Directive::OMPD_loop))
+      applyToTeams = false;
+  }
+
+  auto &objects = std::get<tomp::ObjectListT<IdTy, ExprTy>>(clause.t);
+
+  tomp::ObjectListT<IdTy, ExprTy> sharedObjects;
+  llvm::transform(objects, std::back_inserter(sharedObjects),
+                  [&](const ObjectTy &object) {
+                    auto maybeBase = helper.getBaseObject(object);
+                    return maybeBase ? *maybeBase : object;
+                  });
+
+  // S For the parallel and teams constructs above, the effect of the
+  // S reduction clause instead is as if each list item or, for any list
+  // S item that is an array item, its corresponding base array or base
+  // S pointer appears in a shared clause for the construct.
+  if (!sharedObjects.empty()) {
+    if (hasParallel && !applyToParallel) {
+      auto *shared = makeClause(
+          llvm::omp::Clause::OMPC_shared,
+          tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/sharedObjects});
+      hasParallel->clauses.push_back(shared);
+    }
+    if (hasTeams && !applyToTeams) {
+      auto *shared = makeClause(
+          llvm::omp::Clause::OMPC_shared,
+          tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/sharedObjects});
+      hasTeams->clauses.push_back(shared);
+    }
+  }
+
+  // TODO(not implemented in parser yet): Apply the following.
+  // S If the task reduction-modifier is specified, the effect is as if
+  // S it only modifies the behavior of the reduction clause on the innermost
+  // S leaf construct that accepts the modifier (see Section 5.5.8). If the
+  // S inscan reduction-modifier is specified, the effect is as if it modifies
+  // S the behavior of the reduction clause on all constructs of the combined
+  // S construct to which the clause is applied and that accept the modifier.
+
+  bool applied = applyIf(node, [&](auto &dwc) {
+    if (!applyToParallel && &dwc == hasParallel)
+      return false;
+    if (!applyToTeams && &dwc == hasTeams)
+      return false;
+    return true;
+  });
+
+  // S If a list item in a reduction clause on a combined target construct
+  // S does not have the same base variable or base pointer as a list item
+  // S in a map clause on the construct, then the effect is as if the list
+  // S item in the reduction clause appears as a list item in a map clause
+  // S with a map-type of tofrom.
+  auto hasTarget = findDirective(llvm::omp::Directive::OMPD_target);
+  if (hasTarget && leafs.size() > 1) {
+    tomp::ObjectListT<IdTy, ExprTy> tofrom;
+    llvm::copy_if(objects, std::back_inserter(tofrom),
+                  [&](const ObjectTy &object) {
+                    if (auto maybeBase = helper.getBaseObject(object))
+                      return !mapBases.contains(maybeBase->id());
+                    return !mapBases.contains(object.id()); // XXX is this ok?
+                  });
+    if (!tofrom.empty()) {
+      using MapType =
+          typename tomp::clause::MapT<TypeTy, IdTy, ExprTy>::MapType;
+      auto *map = makeClause(
+          llvm::omp::Clause::OMPC_map,
+          tomp::clause::MapT<TypeTy, IdTy, ExprTy>{
+              {/*MapType=*/MapType::Tofrom, /*MapTypeModifier=*/std::nullopt,
+               /*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt,
+               /*LocatorList=*/std::move(tofrom)}});
+
+      hasTarget->clauses.push_back(map);
+      applied = true;
+    }
+  }
+
+  return applied;
+}
+
+// IF
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::IfT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  using DirectiveNameModifier =
+      typename clause::IfT<TypeTy, IdTy, ExprTy>::DirectiveNameModifier;
+  auto &modifier = std::get<std::optional<DirectiveNameModifier>>(clause.t);
+
+  if (modifier) {
+    llvm::omp::Directive dirId = *modifier;
+
+    if (auto *hasDir = findDirective(dirId)) {
+      hasDir->clauses.push_back(node);
+      return true;
+    }
+    return false;
+  }
+
+  return applyToAll(node);
+}
+
+// LINEAR
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::LinearT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // S The effect of the linear clause is as if it is applied to the innermost
+  // S leaf construct.
+  if (!applyToInnermost(node))
+    return false;
+
+  // The rest is about SIMD.
+  if (!findDirective(llvm::omp::OMPD_simd))
+    return true;
+
+  // S Additionally, if the list item is not the iteration variable of a
+  // S simd or worksharing-loop SIMD construct, the effect on the outer leaf
+  // S constructs is as if the list item was specified in firstprivate and
+  // S lastprivate clauses on the combined or composite construct, [...]
+  //
+  // S If a list item of the linear clause is the iteration variable of a
+  // S simd or worksharing-loop SIMD construct and it is not declared in
+  // S the construct, the effect on the outer leaf constructs is as if the
+  // S list item was specified in a lastprivate clause on the combined or
+  // S composite construct [...]
+
+  // It's not clear how an object can be listed in a clause AND be the
+  // iteration variable of a construct in which is it declared. If an
+  // object is declared in the construct, then the declaration is located
+  // after the clause listing it.
+
+  std::optional<ObjectTy> iterVar = helper.getLoopIterVar();
+  const auto &objects = std::get<tomp::ObjectListT<IdTy, ExprTy>>(clause.t);
+
+  // Lists of objects that will be used to construct FIRSTPRIVATE and
+  // LASTPRIVATE clauses.
+  tomp::ObjectListT<IdTy, ExprTy> first, last;
+
+  for (const ObjectTy &object : objects) {
+    last.push_back(object);
+    if (iterVar && object.id() != iterVar->id())
+      first.push_back(object);
+  }
+
+  if (!first.empty()) {
+    auto *firstp = makeClause(
+        llvm::omp::Clause::OMPC_firstprivate,
+        tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy>{/*List=*/first});
+    nodes.push_back(firstp); // Appending to the main clause list.
+  }
+  if (!last.empty()) {
+    auto *lastp =
+        makeClause(llvm::omp::Clause::OMPC_lastprivate,
+                   tomp::clause::LastprivateT<TypeTy, IdTy, ExprTy>{
+                       {/*LastprivateModifier=*/std::nullopt, /*List=*/last}});
+    nodes.push_back(lastp); // Appending to the main clause list.
+  }
+  return true;
+}
+
+// NOWAIT
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::NowaitT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  return applyToOutermost(node);
+}
+
+template <typename C, typename H> bool ConstructDecompositionT<C, H>::split() {
+  bool success = true;
+
+  for (llvm::omp::Directive leaf :
+       llvm::omp::getLeafConstructsOrSelf(construct))
+    leafs.push_back(LeafReprInternal{leaf, /*clauses=*/{}});
+
+  for (const ClauseTy *node : nodes)
+    addClauseSymsToMap(*node, node);
+
+  // First we need to apply LINEAR, because it can generate additional
+  // FIRSTPRIVATE and LASTPRIVATE clauses that apply to the combined/
+  // composite construct.
+  // Collect them separately, because they may modify the clause list.
+  llvm::SmallVector<const ClauseTy *> linears;
+  for (const ClauseTy *node : nodes) {
+    if (node->id == llvm::omp::Clause::OMPC_linear)
+      linears.push_back(node);
+  }
+  for (const auto *node : linears) {
+    success = success &&
+              applyClause(std::get<tomp::clause::LinearT<TypeTy, IdTy, ExprTy>>(
+                              node->u),
+                          node);
+  }
+
+  // ALLOCATE clauses need to be applied last since they need to see
+  // which directives have data-privatizing clauses.
+  auto skip = [](const ClauseTy *node) {
+    switch (node->id) {
+    case llvm::omp::Clause::OMPC_allocate:
+    case llvm::omp::Clause::OMPC_linear:
+      return true;
+    default:
+      return false;
+    }
+  };
+
+  // Apply (almost) all clauses.
+  for (const ClauseTy *node : nodes) {
+    if (skip(node))
+      continue;
+    success =
+        success &&
+        std::visit([&](auto &&s) { return applyClause(s, node); }, node->u);
+  }
+
+  // Apply ALLOCATE.
+  for (const ClauseTy *node : nodes) {
+    if (node->id != llvm::omp::Clause::OMPC_allocate)
+      continue;
+    success =
+        success &&
+        std::visit([&](auto &&s) { return applyClause(s, node); }, node->u);
+  }
+
+  return success;
+}
+
+} // namespace tomp
+
+#endif // LLVM_FRONTEND_OPENMP_COMPOUNDSPLITTERT_H

>From 60201bbf05fd1cbd63c0073cf399dcb7cab1ff99 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Thu, 25 Apr 2024 13:09:46 -0500
Subject: [PATCH 5/8] Add LLVM license header, fix include guard

---
 .../Frontend/OpenMP/ConstructDecompositionT.h | 23 ++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
index f8a18a1bed7ae6..f2a3c0d327fd46 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
@@ -1,5 +1,22 @@
-#ifndef LLVM_FRONTEND_OPENMP_COMPOUNDSPLITTERT_H
-#define LLVM_FRONTEND_OPENMP_COMPOUNDSPLITTERT_H
+//===- ConstructDecompositionT.h -- Decomposing compound constructs -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Given a compound construct with a set of clauses, generate the list of
+// constituent leaf constructs, each with a list of clauses that apply to it.
+//
+// Note: Clauses that are not originally present, but that are implied by the
+// OpenMP spec are materialized, and are present in the output.
+//
+// Note: Composite constructs will also be broken up into leaf constructs.
+// If composite constructs require processing as a whole, the lists of clauses
+// for each leaf constituent should be concatenated.
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_FRONTEND_OPENMP_CONSTRUCTDECOMPOSITIONT_H
+#define LLVM_FRONTEND_OPENMP_CONSTRUCTDECOMPOSITIONT_H
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -982,4 +999,4 @@ template <typename C, typename H> bool ConstructDecompositionT<C, H>::split() {
 
 } // namespace tomp
 
-#endif // LLVM_FRONTEND_OPENMP_COMPOUNDSPLITTERT_H
+#endif // LLVM_FRONTEND_OPENMP_CONSTRUCTDECOMPOSITIONT_H

>From d5635d667fd2e32eb600a810252b031ad7064fce Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Thu, 25 Apr 2024 14:08:12 -0500
Subject: [PATCH 6/8] Split the directive decomposer into its own file

---
 flang/lib/Lower/CMakeLists.txt        |   1 +
 flang/lib/Lower/OpenMP/Decomposer.cpp | 107 ++++++++++++++++++++++
 flang/lib/Lower/OpenMP/Decomposer.h   |  44 +++++++++
 flang/lib/Lower/OpenMP/OpenMP.cpp     | 126 ++++++--------------------
 4 files changed, 179 insertions(+), 99 deletions(-)
 create mode 100644 flang/lib/Lower/OpenMP/Decomposer.cpp
 create mode 100644 flang/lib/Lower/OpenMP/Decomposer.h

diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt
index f92d1a2bc7de18..1546409752e781 100644
--- a/flang/lib/Lower/CMakeLists.txt
+++ b/flang/lib/Lower/CMakeLists.txt
@@ -27,6 +27,7 @@ add_flang_library(FortranLower
   OpenMP/ClauseProcessor.cpp
   OpenMP/Clauses.cpp
   OpenMP/DataSharingProcessor.cpp
+  OpenMP/Decomposer.cpp
   OpenMP/OpenMP.cpp
   OpenMP/ReductionProcessor.cpp
   OpenMP/Utils.cpp
diff --git a/flang/lib/Lower/OpenMP/Decomposer.cpp b/flang/lib/Lower/OpenMP/Decomposer.cpp
new file mode 100644
index 00000000000000..5bc19797e08a1b
--- /dev/null
+++ b/flang/lib/Lower/OpenMP/Decomposer.cpp
@@ -0,0 +1,107 @@
+//===-- Decomposer.cpp -- Compound directive decomposition ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#include "Decomposer.h"
+
+#include "Clauses.h"
+#include "Utils.h"
+#include "flang/Lower/PFTBuilder.h"
+#include "flang/Semantics/semantics.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Frontend/OpenMP/ClauseT.h"
+#include "llvm/Frontend/OpenMP/ConstructDecompositionT.h"
+#include "llvm/Frontend/OpenMP/OMP.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <optional>
+#include <utility>
+#include <variant>
+
+using namespace Fortran;
+
+namespace {
+using namespace Fortran::lower::omp;
+
+struct ConstructDecomposition {
+  ConstructDecomposition(mlir::ModuleOp modOp,
+                         semantics::SemanticsContext &semaCtx,
+                         lower::pft::Evaluation &ev,
+                         llvm::omp::Directive compound,
+                         const List<Clause> &clauses)
+      : semaCtx(semaCtx), mod(modOp), eval(ev) {
+    tomp::ConstructDecompositionT decompose(getOpenMPVersion(modOp), *this,
+                                            compound, llvm::ArrayRef(clauses));
+    output = std::move(decompose.output);
+  }
+
+  // Given an object, return its base object if one exists.
+  std::optional<Object> getBaseObject(const Object &object) {
+    return lower::omp::getBaseObject(object, semaCtx);
+  }
+
+  // Return the iteration variable of the associated loop if any.
+  std::optional<Object> getLoopIterVar() {
+    if (semantics::Symbol *symbol = getIterationVariableSymbol(eval))
+      return Object{symbol, /*designator=*/{}};
+    return std::nullopt;
+  }
+
+  semantics::SemanticsContext &semaCtx;
+  mlir::ModuleOp mod;
+  lower::pft::Evaluation &eval;
+  List<UnitConstruct> output;
+};
+} // namespace
+
+namespace Fortran::lower::omp {
+LLVM_DUMP_METHOD llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                               const UnitConstruct &uc) {
+  os << llvm::omp::getOpenMPDirectiveName(uc.id);
+  for (auto [index, clause] : llvm::enumerate(uc.clauses)) {
+    os << (index == 0 ? '\t' : ' ');
+    os << llvm::omp::getOpenMPClauseName(clause.id);
+  }
+  return os;
+}
+
+ConstructQueue buildConstructQueue(
+    mlir::ModuleOp modOp, Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::pft::Evaluation &eval, llvm::omp::Directive compound,
+    const List<Clause> &clauses) {
+
+  List<UnitConstruct> constructs;
+
+  ConstructDecomposition decompose(modOp, semaCtx, eval, compound, clauses);
+  assert(!decompose.output.empty());
+
+  llvm::SmallVector<llvm::omp::Directive> loweringUnits;
+  std::ignore =
+      llvm::omp::getLeafOrCompositeConstructs(compound, loweringUnits);
+
+  int leafIndex = 0;
+  for (llvm::omp::Directive dir_id : loweringUnits) {
+    constructs.push_back(UnitConstruct{dir_id});
+    UnitConstruct &uc = constructs.back();
+    llvm::ArrayRef<llvm::omp::Directive> leafsOrSelf =
+        llvm::omp::getLeafConstructsOrSelf(dir_id);
+    for (int i = 0, e = leafsOrSelf.size(); i != e; ++i) {
+      uc.clauses.append(decompose.output[leafIndex].clauses);
+      ++leafIndex;
+    }
+  }
+
+  return constructs;
+}
+} // namespace Fortran::lower::omp
diff --git a/flang/lib/Lower/OpenMP/Decomposer.h b/flang/lib/Lower/OpenMP/Decomposer.h
new file mode 100644
index 00000000000000..e2a9e29918fbb8
--- /dev/null
+++ b/flang/lib/Lower/OpenMP/Decomposer.h
@@ -0,0 +1,44 @@
+//===-- Decomposer.h -- Compound directive decomposition ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef FORTRAN_LOWER_OPENMP_DECOMPOSER_H
+#define FORTRAN_LOWER_OPENMP_DECOMPOSER_H
+
+#include "Clauses.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "llvm/Frontend/OpenMP/ConstructDecompositionT.h"
+#include "llvm/Frontend/OpenMP/OMP.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class raw_ostream;
+}
+
+namespace Fortran {
+namespace semantics {
+class SemanticsContext;
+}
+namespace lower::pft {
+struct Evaluation;
+}
+} // namespace Fortran
+
+namespace Fortran::lower::omp {
+using UnitConstruct = tomp::DirectiveWithClauses<lower::omp::Clause>;
+using ConstructQueue = List<UnitConstruct>;
+
+LLVM_DUMP_METHOD llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                               const UnitConstruct &uc);
+
+ConstructQueue
+buildConstructQueue(mlir::ModuleOp modOp,
+                    Fortran::semantics::SemanticsContext &semaCtx,
+                    Fortran::lower::pft::Evaluation &eval,
+                    llvm::omp::Directive compound, const List<Clause> &clauses);
+} // namespace Fortran::lower::omp
+
+#endif // FORTRAN_LOWER_OPENMP_DECOMPOSER_H
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 4b8afd42f639d5..7e0105e749c39e 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -15,6 +15,7 @@
 #include "ClauseProcessor.h"
 #include "Clauses.h"
 #include "DataSharingProcessor.h"
+#include "Decomposer.h"
 #include "DirectivesCommon.h"
 #include "ReductionProcessor.h"
 #include "Utils.h"
@@ -36,7 +37,6 @@
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Frontend/OpenMP/ConstructDecompositionT.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 
 using namespace Fortran::lower::omp;
@@ -45,6 +45,13 @@ using namespace Fortran::lower::omp;
 // Code generation helper functions
 //===----------------------------------------------------------------------===//
 
+static void genOMPDispatch(Fortran::lower::AbstractConverter &converter,
+                           Fortran::lower::SymMap &symTable,
+                           Fortran::semantics::SemanticsContext &semaCtx,
+                           Fortran::lower::pft::Evaluation &eval,
+                           mlir::Location loc, const ConstructQueue &queue,
+                           ConstructQueue::iterator item);
+
 static Fortran::lower::pft::Evaluation *
 getCollapsedLoopEval(Fortran::lower::pft::Evaluation &eval, int collapseValue) {
   // Return the Evaluation of the innermost collapsed loop, or the current one
@@ -73,89 +80,6 @@ static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
     converter.genEval(e);
 }
 
-//===----------------------------------------------------------------------===//
-// Directive decomposition
-//===----------------------------------------------------------------------===//
-
-namespace {
-using DirectiveWithClauses = tomp::DirectiveWithClauses<lower::omp::Clause>;
-using ConstructQueue = List<DirectiveWithClauses>;
-} // namespace
-
-static void genOMPDispatch(Fortran::lower::AbstractConverter &converter,
-                           Fortran::lower::SymMap &symTable,
-                           Fortran::semantics::SemanticsContext &semaCtx,
-                           Fortran::lower::pft::Evaluation &eval,
-                           mlir::Location loc, const ConstructQueue &queue,
-                           ConstructQueue::iterator item);
-
-namespace {
-struct ConstructDecomposition {
-  ConstructDecomposition(mlir::ModuleOp modOp,
-                         semantics::SemanticsContext &semaCtx,
-                         lower::pft::Evaluation &ev,
-                         llvm::omp::Directive construct,
-                         const List<Clause> &clauses)
-      : semaCtx(semaCtx), mod(modOp), eval(ev) {
-    tomp::ConstructDecompositionT decompose(getOpenMPVersion(modOp), *this,
-                                            construct, llvm::ArrayRef(clauses));
-    output = std::move(decompose.output);
-  }
-
-  // Given an object, return its base object if one exists.
-  std::optional<Object> getBaseObject(const Object &object) {
-    return lower::omp::getBaseObject(object, semaCtx);
-  }
-
-  // Return the iteration variable of the associated loop if any.
-  std::optional<Object> getLoopIterVar() {
-    if (semantics::Symbol *symbol = getIterationVariableSymbol(eval))
-      return Object{symbol, /*designator=*/{}};
-    return std::nullopt;
-  }
-
-  semantics::SemanticsContext &semaCtx;
-  mlir::ModuleOp mod;
-  lower::pft::Evaluation &eval;
-  List<DirectiveWithClauses> output;
-};
-} // namespace
-
-LLVM_DUMP_METHOD static llvm::raw_ostream &
-operator<<(llvm::raw_ostream &os, const DirectiveWithClauses &dwc) {
-  os << llvm::omp::getOpenMPDirectiveName(dwc.id);
-  for (auto [index, clause] : llvm::enumerate(dwc.clauses)) {
-    os << (index == 0 ? '\t' : ' ');
-    os << llvm::omp::getOpenMPClauseName(clause.id);
-  }
-  return os;
-}
-
-static void splitCompoundConstruct(
-    mlir::ModuleOp modOp, Fortran::semantics::SemanticsContext &semaCtx,
-    Fortran::lower::pft::Evaluation &eval, llvm::omp::Directive construct,
-    const List<Clause> &clauses, List<DirectiveWithClauses> &directives) {
-
-  ConstructDecomposition decompose(modOp, semaCtx, eval, construct, clauses);
-  assert(!decompose.output.empty());
-
-  llvm::SmallVector<llvm::omp::Directive> loweringUnits;
-  std::ignore =
-      llvm::omp::getLeafOrCompositeConstructs(construct, loweringUnits);
-
-  int leafIndex = 0;
-  for (llvm::omp::Directive dir_id : loweringUnits) {
-    directives.push_back(DirectiveWithClauses{dir_id});
-    DirectiveWithClauses &dwc = directives.back();
-    llvm::ArrayRef<llvm::omp::Directive> leafsOrSelf =
-        llvm::omp::getLeafConstructsOrSelf(dir_id);
-    for (int i = 0, e = leafsOrSelf.size(); i != e; ++i) {
-      dwc.clauses.append(decompose.output[leafIndex].clauses);
-      ++leafIndex;
-    }
-  }
-}
-
 static fir::GlobalOp globalInitialization(
     Fortran::lower::AbstractConverter &converter,
     fir::FirOpBuilder &firOpBuilder, const Fortran::semantics::Symbol &sym,
@@ -2170,7 +2094,9 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
       semaCtx);
   mlir::Location currentLocation = converter.genLocation(directive.source);
 
-  ConstructQueue queue{{DirectiveWithClauses{directive.v, clauses}}};
+  ConstructQueue queue{
+      buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
+                          eval, directive.v, clauses)};
 
   switch (directive.v) {
   default:
@@ -2234,7 +2160,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
   mlir::Location currentLocation = converter.genLocation(verbatim.source);
 
   ConstructQueue queue{
-      DirectiveWithClauses{llvm::omp::Directive::OMPD_flush, clauses}};
+      buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
+                          eval, llvm::omp::Directive::OMPD_flush, clauses)};
   genFlushOp(converter, symTable, semaCtx, eval, currentLocation, objects,
              clauses, queue, queue.begin());
 }
@@ -2381,9 +2308,9 @@ genOMP(Fortran::lower::AbstractConverter &converter,
 
   llvm::omp::Directive directive =
       std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v;
-  ConstructQueue queue;
-  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
-                         directive, clauses, queue);
+  ConstructQueue queue{
+      buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
+                          eval, directive, clauses)};
   genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
                  queue.begin());
 }
@@ -2399,9 +2326,9 @@ genOMP(Fortran::lower::AbstractConverter &converter,
   List<Clause> clauses =
       makeClauses(std::get<Fortran::parser::OmpClauseList>(cd.t), semaCtx);
 
-  ConstructQueue queue;
-  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
-                         llvm::omp::Directive::OMPD_critical, clauses, queue);
+  ConstructQueue queue{
+      buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
+                          eval, llvm::omp::Directive::OMPD_critical, clauses)};
 
   const auto &name = std::get<std::optional<Fortran::parser::Name>>(cd.t);
   mlir::Location currentLocation = converter.getCurrentLocation();
@@ -2440,9 +2367,9 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
 
   llvm::omp::Directive directive =
       std::get<parser::OmpLoopDirective>(beginLoopDirective.t).v;
-  ConstructQueue queue;
-  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
-                         directive, clauses, queue);
+  ConstructQueue queue{
+      buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
+                          eval, directive, clauses)};
   genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
                  queue.begin());
 }
@@ -2455,7 +2382,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
        const Fortran::parser::OpenMPSectionConstruct &sectionConstruct) {
   mlir::Location loc = converter.getCurrentLocation();
   ConstructQueue queue{
-      DirectiveWithClauses{llvm::omp::Directive::OMPD_section}};
+      buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
+                          eval, llvm::omp::Directive::OMPD_section, {})};
   genSectionOp(converter, symTable, semaCtx, eval, loc,
                /*clauses=*/{}, queue, queue.begin());
 }
@@ -2480,9 +2408,9 @@ genOMP(Fortran::lower::AbstractConverter &converter,
 
   llvm::omp::Directive directive =
       std::get<parser::OmpSectionsDirective>(beginSectionsDirective.t).v;
-  ConstructQueue queue;
-  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
-                         directive, clauses, queue);
+  ConstructQueue queue{
+      buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
+                          eval, directive, clauses)};
   genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
                  queue.begin());
 }

>From 38ea99f79d7245978b08dae082f0c1999219fe78 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 30 Apr 2024 07:47:57 -0500
Subject: [PATCH 7/8] Expand tuple member into class with TupleTrait

---
 llvm/include/llvm/Frontend/OpenMP/ClauseT.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
index daef02bcfc9a3f..9357e3fb8e5524 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
@@ -1142,9 +1142,11 @@ struct UsesAllocatorsT {
   using MemSpace = E;
   using TraitsArray = ObjectT<I, E>;
   using Allocator = E;
-  using AllocatorSpec =
-      std::tuple<OPT(MemSpace), OPT(TraitsArray), Allocator>; // Not a spec name
-  using Allocators = ListT<AllocatorSpec>;                    // Not a spec name
+  struct AllocatorSpec { // Not a spec name
+    using TupleTrait = std::true_type;
+    std::tuple<OPT(MemSpace), OPT(TraitsArray), Allocator> t;
+  };
+  using Allocators = ListT<AllocatorSpec>; // Not a spec name
   using WrapperTrait = std::true_type;
   Allocators v;
 };

>From 579967ee26d08d4afaf00dd0e8ff38e5cb09a9b1 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 30 Apr 2024 13:02:02 -0500
Subject: [PATCH 8/8] Switch to getOpenMPVersionAttribute, remove old functions

---
 flang/lib/Lower/OpenMP/Decomposer.cpp | 6 ++++--
 flang/lib/Lower/OpenMP/Utils.cpp      | 6 ------
 flang/lib/Lower/OpenMP/Utils.h        | 1 -
 3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/Decomposer.cpp b/flang/lib/Lower/OpenMP/Decomposer.cpp
index 5bc19797e08a1b..2d14753f83d4e9 100644
--- a/flang/lib/Lower/OpenMP/Decomposer.cpp
+++ b/flang/lib/Lower/OpenMP/Decomposer.cpp
@@ -16,6 +16,7 @@
 #include "Utils.h"
 #include "flang/Lower/PFTBuilder.h"
 #include "flang/Semantics/semantics.h"
+#include "flang/Tools/CrossToolHelpers.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
@@ -41,8 +42,9 @@ struct ConstructDecomposition {
                          llvm::omp::Directive compound,
                          const List<Clause> &clauses)
       : semaCtx(semaCtx), mod(modOp), eval(ev) {
-    tomp::ConstructDecompositionT decompose(getOpenMPVersion(modOp), *this,
-                                            compound, llvm::ArrayRef(clauses));
+    tomp::ConstructDecompositionT decompose(getOpenMPVersionAttribute(modOp),
+                                            *this, compound,
+                                            llvm::ArrayRef(clauses));
     output = std::move(decompose.output);
   }
 
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index c38e0c18cac88c..0e7645859a4f52 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -48,12 +48,6 @@ int64_t getCollapseValue(const List<Clause> &clauses) {
   return 1;
 }
 
-uint32_t getOpenMPVersion(mlir::ModuleOp mod) {
-  if (mlir::Attribute verAttr = mod->getAttr("omp.version"))
-    return llvm::cast<mlir::omp::VersionAttr>(verAttr).getVersion();
-  llvm_unreachable("Expecting OpenMP version attribute in module");
-}
-
 void genObjectList(const ObjectList &objects,
                    Fortran::lower::AbstractConverter &converter,
                    llvm::SmallVectorImpl<mlir::Value> &operands) {
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 5e0ebba23bf358..82c1fb3c8209e5 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -65,7 +65,6 @@ void gatherFuncAndVarSyms(
     llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause);
 
 int64_t getCollapseValue(const List<Clause> &clauses);
-uint32_t getOpenMPVersion(mlir::ModuleOp mod);
 
 Fortran::semantics::Symbol *
 getOmpObjectSymbol(const Fortran::parser::OmpObject &ompObject);



More information about the llvm-commits mailing list