[llvm-branch-commits] [flang] [llvm] [flang][OpenMP] Decompose compound construccts, do recursive lowering (PR #90098)
    Krzysztof Parzyszek via llvm-branch-commits 
    llvm-branch-commits at lists.llvm.org
       
    Thu Apr 25 11:10:20 PDT 2024
    
    
  
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/90098
>From 6f7697e46ace92707bc4cf648fab25a72c0639a1 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Thu, 11 Apr 2024 10:35:02 -0500
Subject: [PATCH 1/2] [flang][OpenMP] Decompose compound construccts, do
 recursive lowering
A compound construct with a list of clauses is broken up into
individual leaf/composite constructs. Each such construct has
the list of clauses that apply to it based on the OpenMP spec.
Each lowering function (i.e. a function that generates MLIR ops)
is now responsible for generating its body as described below.
Functions that receive AST nodes extract the construct, and the
clauses from the node. They then create a work queue consisting
of individual constructs, and invoke a common dispatch function.
The dispatch function examines the current position in the queue,
and invokes the appropriate lowering function. Each lowering
function receives the queue as well, and once it needs to generate
its body, it either invokes the dispatch function on the rest of
the queue (if any), or processes nested evaluations if the work
queue is at the end.
---
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 784 +++++++-------
 .../Frontend/OpenMP/ConstructDecompositionT.h | 985 ++++++++++++++++++
 2 files changed, 1376 insertions(+), 393 deletions(-)
 create mode 100644 llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 47935e6cf8efcf..4b8afd42f639d5 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -36,6 +36,7 @@
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Frontend/OpenMP/ConstructDecompositionT.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 
 using namespace Fortran::lower::omp;
@@ -72,6 +73,89 @@ static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
     converter.genEval(e);
 }
 
+//===----------------------------------------------------------------------===//
+// Directive decomposition
+//===----------------------------------------------------------------------===//
+
+namespace {
+using DirectiveWithClauses = tomp::DirectiveWithClauses<lower::omp::Clause>;
+using ConstructQueue = List<DirectiveWithClauses>;
+} // namespace
+
+static void genOMPDispatch(Fortran::lower::AbstractConverter &converter,
+                           Fortran::lower::SymMap &symTable,
+                           Fortran::semantics::SemanticsContext &semaCtx,
+                           Fortran::lower::pft::Evaluation &eval,
+                           mlir::Location loc, const ConstructQueue &queue,
+                           ConstructQueue::iterator item);
+
+namespace {
+struct ConstructDecomposition {
+  ConstructDecomposition(mlir::ModuleOp modOp,
+                         semantics::SemanticsContext &semaCtx,
+                         lower::pft::Evaluation &ev,
+                         llvm::omp::Directive construct,
+                         const List<Clause> &clauses)
+      : semaCtx(semaCtx), mod(modOp), eval(ev) {
+    tomp::ConstructDecompositionT decompose(getOpenMPVersion(modOp), *this,
+                                            construct, llvm::ArrayRef(clauses));
+    output = std::move(decompose.output);
+  }
+
+  // Given an object, return its base object if one exists.
+  std::optional<Object> getBaseObject(const Object &object) {
+    return lower::omp::getBaseObject(object, semaCtx);
+  }
+
+  // Return the iteration variable of the associated loop if any.
+  std::optional<Object> getLoopIterVar() {
+    if (semantics::Symbol *symbol = getIterationVariableSymbol(eval))
+      return Object{symbol, /*designator=*/{}};
+    return std::nullopt;
+  }
+
+  semantics::SemanticsContext &semaCtx;
+  mlir::ModuleOp mod;
+  lower::pft::Evaluation &eval;
+  List<DirectiveWithClauses> output;
+};
+} // namespace
+
+LLVM_DUMP_METHOD static llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os, const DirectiveWithClauses &dwc) {
+  os << llvm::omp::getOpenMPDirectiveName(dwc.id);
+  for (auto [index, clause] : llvm::enumerate(dwc.clauses)) {
+    os << (index == 0 ? '\t' : ' ');
+    os << llvm::omp::getOpenMPClauseName(clause.id);
+  }
+  return os;
+}
+
+static void splitCompoundConstruct(
+    mlir::ModuleOp modOp, Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::pft::Evaluation &eval, llvm::omp::Directive construct,
+    const List<Clause> &clauses, List<DirectiveWithClauses> &directives) {
+
+  ConstructDecomposition decompose(modOp, semaCtx, eval, construct, clauses);
+  assert(!decompose.output.empty());
+
+  llvm::SmallVector<llvm::omp::Directive> loweringUnits;
+  std::ignore =
+      llvm::omp::getLeafOrCompositeConstructs(construct, loweringUnits);
+
+  int leafIndex = 0;
+  for (llvm::omp::Directive dir_id : loweringUnits) {
+    directives.push_back(DirectiveWithClauses{dir_id});
+    DirectiveWithClauses &dwc = directives.back();
+    llvm::ArrayRef<llvm::omp::Directive> leafsOrSelf =
+        llvm::omp::getLeafConstructsOrSelf(dir_id);
+    for (int i = 0, e = leafsOrSelf.size(); i != e; ++i) {
+      dwc.clauses.append(decompose.output[leafIndex].clauses);
+      ++leafIndex;
+    }
+  }
+}
+
 static fir::GlobalOp globalInitialization(
     Fortran::lower::AbstractConverter &converter,
     fir::FirOpBuilder &firOpBuilder, const Fortran::semantics::Symbol &sym,
@@ -460,81 +544,6 @@ markDeclareTarget(mlir::Operation *op,
   declareTargetOp.setDeclareTarget(deviceType, captureClause);
 }
 
-/// Split a combined directive into an outer leaf directive and the (possibly
-/// combined) rest of the combined directive. Composite directives and
-/// non-compound directives are not split, in which case it will return the
-/// input directive as its first output and an empty value as its second output.
-static std::pair<llvm::omp::Directive, std::optional<llvm::omp::Directive>>
-splitCombinedDirective(llvm::omp::Directive dir) {
-  using D = llvm::omp::Directive;
-  switch (dir) {
-  case D::OMPD_masked_taskloop:
-    return {D::OMPD_masked, D::OMPD_taskloop};
-  case D::OMPD_masked_taskloop_simd:
-    return {D::OMPD_masked, D::OMPD_taskloop_simd};
-  case D::OMPD_master_taskloop:
-    return {D::OMPD_master, D::OMPD_taskloop};
-  case D::OMPD_master_taskloop_simd:
-    return {D::OMPD_master, D::OMPD_taskloop_simd};
-  case D::OMPD_parallel_do:
-    return {D::OMPD_parallel, D::OMPD_do};
-  case D::OMPD_parallel_do_simd:
-    return {D::OMPD_parallel, D::OMPD_do_simd};
-  case D::OMPD_parallel_masked:
-    return {D::OMPD_parallel, D::OMPD_masked};
-  case D::OMPD_parallel_masked_taskloop:
-    return {D::OMPD_parallel, D::OMPD_masked_taskloop};
-  case D::OMPD_parallel_masked_taskloop_simd:
-    return {D::OMPD_parallel, D::OMPD_masked_taskloop_simd};
-  case D::OMPD_parallel_master:
-    return {D::OMPD_parallel, D::OMPD_master};
-  case D::OMPD_parallel_master_taskloop:
-    return {D::OMPD_parallel, D::OMPD_master_taskloop};
-  case D::OMPD_parallel_master_taskloop_simd:
-    return {D::OMPD_parallel, D::OMPD_master_taskloop_simd};
-  case D::OMPD_parallel_sections:
-    return {D::OMPD_parallel, D::OMPD_sections};
-  case D::OMPD_parallel_workshare:
-    return {D::OMPD_parallel, D::OMPD_workshare};
-  case D::OMPD_target_parallel:
-    return {D::OMPD_target, D::OMPD_parallel};
-  case D::OMPD_target_parallel_do:
-    return {D::OMPD_target, D::OMPD_parallel_do};
-  case D::OMPD_target_parallel_do_simd:
-    return {D::OMPD_target, D::OMPD_parallel_do_simd};
-  case D::OMPD_target_simd:
-    return {D::OMPD_target, D::OMPD_simd};
-  case D::OMPD_target_teams:
-    return {D::OMPD_target, D::OMPD_teams};
-  case D::OMPD_target_teams_distribute:
-    return {D::OMPD_target, D::OMPD_teams_distribute};
-  case D::OMPD_target_teams_distribute_parallel_do:
-    return {D::OMPD_target, D::OMPD_teams_distribute_parallel_do};
-  case D::OMPD_target_teams_distribute_parallel_do_simd:
-    return {D::OMPD_target, D::OMPD_teams_distribute_parallel_do_simd};
-  case D::OMPD_target_teams_distribute_simd:
-    return {D::OMPD_target, D::OMPD_teams_distribute_simd};
-  case D::OMPD_teams_distribute:
-    return {D::OMPD_teams, D::OMPD_distribute};
-  case D::OMPD_teams_distribute_parallel_do:
-    return {D::OMPD_teams, D::OMPD_distribute_parallel_do};
-  case D::OMPD_teams_distribute_parallel_do_simd:
-    return {D::OMPD_teams, D::OMPD_distribute_parallel_do_simd};
-  case D::OMPD_teams_distribute_simd:
-    return {D::OMPD_teams, D::OMPD_distribute_simd};
-  case D::OMPD_parallel_loop:
-    return {D::OMPD_parallel, D::OMPD_loop};
-  case D::OMPD_target_parallel_loop:
-    return {D::OMPD_target, D::OMPD_parallel_loop};
-  case D::OMPD_target_teams_loop:
-    return {D::OMPD_target, D::OMPD_teams_loop};
-  case D::OMPD_teams_loop:
-    return {D::OMPD_teams, D::OMPD_loop};
-  default:
-    return {dir, std::nullopt};
-  }
-}
-
 //===----------------------------------------------------------------------===//
 // Op body generation helper structures and functions
 //===----------------------------------------------------------------------===//
@@ -555,11 +564,6 @@ struct OpWithBodyGenInfo {
       : converter(converter), symTable(symTable), semaCtx(semaCtx), loc(loc),
         eval(eval), dir(dir) {}
 
-  OpWithBodyGenInfo &setGenNested(bool value) {
-    genNested = value;
-    return *this;
-  }
-
   OpWithBodyGenInfo &setOuterCombined(bool value) {
     outerCombined = value;
     return *this;
@@ -600,8 +604,6 @@ struct OpWithBodyGenInfo {
   Fortran::lower::pft::Evaluation &eval;
   /// [in] leaf directive for which to generate the op body.
   llvm::omp::Directive dir;
-  /// [in] whether to generate FIR for nested evaluations
-  bool genNested = true;
   /// [in] is this an outer operation - prevents privatization.
   bool outerCombined = false;
   /// [in] list of clauses to process.
@@ -622,7 +624,9 @@ struct OpWithBodyGenInfo {
 ///
 /// \param [in]   op - the operation the body belongs to.
 /// \param [in] info - options controlling code-gen for the construction.
-static void createBodyOfOp(mlir::Operation &op, OpWithBodyGenInfo &info) {
+static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info,
+                           const ConstructQueue &queue,
+                           ConstructQueue::iterator item) {
   fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder();
 
   auto insertMarker = [](fir::FirOpBuilder &builder) {
@@ -678,7 +682,10 @@ static void createBodyOfOp(mlir::Operation &op, OpWithBodyGenInfo &info) {
     }
   }
 
-  if (info.genNested) {
+  if (ConstructQueue::iterator next = std::next(item); next != queue.end()) {
+    genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.eval,
+                   info.loc, queue, next);
+  } else {
     // genFIR(Evaluation&) tries to patch up unterminated blocks, causing
     // a lot of complications for our approach if the terminator generation
     // is delayed past this point. Insert a temporary terminator here, then
@@ -769,11 +776,12 @@ static void genBodyOfTargetDataOp(
     Fortran::lower::AbstractConverter &converter,
     Fortran::lower::SymMap &symTable,
     Fortran::semantics::SemanticsContext &semaCtx,
-    Fortran::lower::pft::Evaluation &eval, bool genNested,
-    mlir::omp::TargetDataOp &dataOp, llvm::ArrayRef<mlir::Type> useDeviceTypes,
+    Fortran::lower::pft::Evaluation &eval, mlir::omp::TargetDataOp &dataOp,
+    llvm::ArrayRef<mlir::Type> useDeviceTypes,
     llvm::ArrayRef<mlir::Location> useDeviceLocs,
     llvm::ArrayRef<const Fortran::semantics::Symbol *> useDeviceSymbols,
-    const mlir::Location ¤tLocation) {
+    const mlir::Location ¤tLocation, const ConstructQueue &queue,
+    ConstructQueue::iterator item) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   mlir::Region ®ion = dataOp.getRegion();
 
@@ -826,8 +834,13 @@ static void genBodyOfTargetDataOp(
 
   // Set the insertion point after the marker.
   firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
-  if (genNested)
+
+  if (ConstructQueue::iterator next = std::next(item); next != queue.end()) {
+    genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
+                   next);
+  } else {
     genNestedEvaluations(converter, eval);
+  }
 }
 
 // This functions creates a block for the body of the targetOp's region. It adds
@@ -836,12 +849,13 @@ static void
 genBodyOfTargetOp(Fortran::lower::AbstractConverter &converter,
                   Fortran::lower::SymMap &symTable,
                   Fortran::semantics::SemanticsContext &semaCtx,
-                  Fortran::lower::pft::Evaluation &eval, bool genNested,
+                  Fortran::lower::pft::Evaluation &eval,
                   mlir::omp::TargetOp &targetOp,
                   llvm::ArrayRef<const Fortran::semantics::Symbol *> mapSyms,
                   llvm::ArrayRef<mlir::Location> mapSymLocs,
                   llvm::ArrayRef<mlir::Type> mapSymTypes,
-                  const mlir::Location ¤tLocation) {
+                  const mlir::Location ¤tLocation,
+                  const ConstructQueue &queue, ConstructQueue::iterator item) {
   assert(mapSymTypes.size() == mapSymLocs.size());
 
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
@@ -981,15 +995,22 @@ genBodyOfTargetOp(Fortran::lower::AbstractConverter &converter,
 
   // Create the insertion point after the marker.
   firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
-  if (genNested)
+
+  if (ConstructQueue::iterator next = std::next(item); next != queue.end()) {
+    genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
+                   next);
+  } else {
     genNestedEvaluations(converter, eval);
+  }
 }
 
 template <typename OpTy, typename... Args>
-static OpTy genOpWithBody(OpWithBodyGenInfo &info, Args &&...args) {
+static OpTy genOpWithBody(const OpWithBodyGenInfo &info,
+                          const ConstructQueue &queue,
+                          ConstructQueue::iterator item, Args &&...args) {
   auto op = info.converter.getFirOpBuilder().create<OpTy>(
       info.loc, std::forward<Args>(args)...);
-  createBodyOfOp(*op, info);
+  createBodyOfOp(*op, info, queue, item);
   return op;
 }
 
@@ -1274,7 +1295,8 @@ static mlir::omp::BarrierOp
 genBarrierOp(Fortran::lower::AbstractConverter &converter,
              Fortran::lower::SymMap &symTable,
              Fortran::semantics::SemanticsContext &semaCtx,
-             Fortran::lower::pft::Evaluation &eval, mlir::Location loc) {
+             Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+             const ConstructQueue &queue, ConstructQueue::iterator item) {
   return converter.getFirOpBuilder().create<mlir::omp::BarrierOp>(loc);
 }
 
@@ -1282,8 +1304,9 @@ static mlir::omp::CriticalOp
 genCriticalOp(Fortran::lower::AbstractConverter &converter,
               Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
-              Fortran::lower::pft::Evaluation &eval, bool genNested,
-              mlir::Location loc, const List<Clause> &clauses,
+              Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+              const List<Clause> &clauses, const ConstructQueue &queue,
+              ConstructQueue::iterator item,
               const std::optional<Fortran::parser::Name> &name) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   mlir::FlatSymbolRefAttr nameAttr;
@@ -1306,17 +1329,17 @@ genCriticalOp(Fortran::lower::AbstractConverter &converter,
 
   return genOpWithBody<mlir::omp::CriticalOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
-                        llvm::omp::Directive::OMPD_critical)
-          .setGenNested(genNested),
-      nameAttr);
+                        llvm::omp::Directive::OMPD_critical),
+      queue, item, nameAttr);
 }
 
 static mlir::omp::DistributeOp
 genDistributeOp(Fortran::lower::AbstractConverter &converter,
                 Fortran::lower::SymMap &symTable,
                 Fortran::semantics::SemanticsContext &semaCtx,
-                Fortran::lower::pft::Evaluation &eval, bool genNested,
-                mlir::Location loc, const List<Clause> &clauses) {
+                Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+                const List<Clause> &clauses, const ConstructQueue &queue,
+                ConstructQueue::iterator item) {
   TODO(loc, "Distribute construct");
   return nullptr;
 }
@@ -1326,7 +1349,8 @@ genFlushOp(Fortran::lower::AbstractConverter &converter,
            Fortran::lower::SymMap &symTable,
            Fortran::semantics::SemanticsContext &semaCtx,
            Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-           const ObjectList &objects, const List<Clause> &clauses) {
+           const ObjectList &objects, const List<Clause> &clauses,
+           const ConstructQueue &queue, ConstructQueue::iterator item) {
   llvm::SmallVector<mlir::Value> operandRange;
   genFlushClauses(converter, semaCtx, objects, clauses, loc, operandRange);
 
@@ -1338,12 +1362,13 @@ static mlir::omp::MasterOp
 genMasterOp(Fortran::lower::AbstractConverter &converter,
             Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
-            Fortran::lower::pft::Evaluation &eval, bool genNested,
-            mlir::Location loc) {
+            Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+            const List<Clause> &clauses, const ConstructQueue &queue,
+            ConstructQueue::iterator item) {
   return genOpWithBody<mlir::omp::MasterOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
-                        llvm::omp::Directive::OMPD_master)
-          .setGenNested(genNested));
+                        llvm::omp::Directive::OMPD_master),
+      queue, item);
 }
 
 static mlir::omp::OrderedOp
@@ -1351,7 +1376,8 @@ genOrderedOp(Fortran::lower::AbstractConverter &converter,
              Fortran::lower::SymMap &symTable,
              Fortran::semantics::SemanticsContext &semaCtx,
              Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-             const List<Clause> &clauses) {
+             const List<Clause> &clauses, const ConstructQueue &queue,
+             ConstructQueue::iterator item) {
   TODO(loc, "OMPD_ordered");
   return nullptr;
 }
@@ -1360,25 +1386,25 @@ static mlir::omp::OrderedRegionOp
 genOrderedRegionOp(Fortran::lower::AbstractConverter &converter,
                    Fortran::lower::SymMap &symTable,
                    Fortran::semantics::SemanticsContext &semaCtx,
-                   Fortran::lower::pft::Evaluation &eval, bool genNested,
-                   mlir::Location loc, const List<Clause> &clauses) {
+                   Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+                   const List<Clause> &clauses, const ConstructQueue &queue,
+                   ConstructQueue::iterator item) {
   mlir::omp::OrderedRegionClauseOps clauseOps;
   genOrderedRegionClauses(converter, semaCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::OrderedRegionOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
-                        llvm::omp::Directive::OMPD_ordered)
-          .setGenNested(genNested),
-      clauseOps);
+                        llvm::omp::Directive::OMPD_ordered),
+      queue, item, clauseOps);
 }
 
 static mlir::omp::ParallelOp
 genParallelOp(Fortran::lower::AbstractConverter &converter,
               Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
-              Fortran::lower::pft::Evaluation &eval, bool genNested,
-              mlir::Location loc, const List<Clause> &clauses,
-              bool outerCombined = false) {
+              Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+              const List<Clause> &clauses, const ConstructQueue &queue,
+              ConstructQueue::iterator item, bool outerCombined = false) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   Fortran::lower::StatementContext stmtCtx;
   mlir::omp::ParallelClauseOps clauseOps;
@@ -1397,14 +1423,14 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
   OpWithBodyGenInfo genInfo =
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_parallel)
-          .setGenNested(genNested)
           .setOuterCombined(outerCombined)
           .setClauses(&clauses)
           .setReductions(&reductionSyms, &reductionTypes)
           .setGenRegionEntryCb(reductionCallback);
 
   if (!enableDelayedPrivatization)
-    return genOpWithBody<mlir::omp::ParallelOp>(genInfo, clauseOps);
+    return genOpWithBody<mlir::omp::ParallelOp>(genInfo, queue, item,
+                                                clauseOps);
 
   bool privatize = !outerCombined;
   DataSharingProcessor dsp(converter, semaCtx, clauses, eval,
@@ -1447,22 +1473,23 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
 
   // TODO Merge with the reduction CB.
   genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp);
-  return genOpWithBody<mlir::omp::ParallelOp>(genInfo, clauseOps);
+  return genOpWithBody<mlir::omp::ParallelOp>(genInfo, queue, item, clauseOps);
 }
 
 static mlir::omp::SectionOp
 genSectionOp(Fortran::lower::AbstractConverter &converter,
              Fortran::lower::SymMap &symTable,
              Fortran::semantics::SemanticsContext &semaCtx,
-             Fortran::lower::pft::Evaluation &eval, bool genNested,
-             mlir::Location loc, const List<Clause> &clauses) {
+             Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+             const List<Clause> &clauses, const ConstructQueue &queue,
+             ConstructQueue::iterator item) {
   // Currently only private/firstprivate clause is handled, and
   // all privatization is done within `omp.section` operations.
   return genOpWithBody<mlir::omp::SectionOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_section)
-          .setGenNested(genNested)
-          .setClauses(&clauses));
+          .setClauses(&clauses),
+      queue, item);
 }
 
 static mlir::omp::SectionsOp
@@ -1470,12 +1497,14 @@ genSectionsOp(Fortran::lower::AbstractConverter &converter,
               Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-              const mlir::omp::SectionsClauseOps &clauseOps) {
+              const List<Clause> &clauses, const ConstructQueue &queue,
+              ConstructQueue::iterator item) {
+  mlir::omp::SectionsClauseOps clauseOps;
+  genSectionsClauses(converter, semaCtx, clauses, loc, clauseOps);
   return genOpWithBody<mlir::omp::SectionsOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
-                        llvm::omp::Directive::OMPD_sections)
-          .setGenNested(false),
-      clauseOps);
+                        llvm::omp::Directive::OMPD_sections),
+      queue, item, clauseOps);
 }
 
 static mlir::omp::SimdOp
@@ -1483,7 +1512,8 @@ genSimdOp(Fortran::lower::AbstractConverter &converter,
           Fortran::lower::SymMap &symTable,
           Fortran::semantics::SemanticsContext &semaCtx,
           Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-          const List<Clause> &clauses) {
+          const List<Clause> &clauses, const ConstructQueue &queue,
+          ConstructQueue::iterator item) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   DataSharingProcessor dsp(converter, semaCtx, clauses, eval);
   dsp.processStep1();
@@ -1518,7 +1548,8 @@ genSimdOp(Fortran::lower::AbstractConverter &converter,
                                    *nestedEval, llvm::omp::Directive::OMPD_simd)
                      .setClauses(&clauses)
                      .setDataSharingProcessor(&dsp)
-                     .setGenRegionEntryCb(ivCallback));
+                     .setGenRegionEntryCb(ivCallback),
+                 queue, item);
 
   return simdOp;
 }
@@ -1527,26 +1558,26 @@ static mlir::omp::SingleOp
 genSingleOp(Fortran::lower::AbstractConverter &converter,
             Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
-            Fortran::lower::pft::Evaluation &eval, bool genNested,
-            mlir::Location loc, const List<Clause> &clauses) {
+            Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+            const List<Clause> &clauses, const ConstructQueue &queue,
+            ConstructQueue::iterator item) {
   mlir::omp::SingleClauseOps clauseOps;
   genSingleClauses(converter, semaCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::SingleOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_single)
-          .setGenNested(genNested)
           .setClauses(&clauses),
-      clauseOps);
+      queue, item, clauseOps);
 }
 
 static mlir::omp::TargetOp
 genTargetOp(Fortran::lower::AbstractConverter &converter,
             Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
-            Fortran::lower::pft::Evaluation &eval, bool genNested,
-            mlir::Location loc, const List<Clause> &clauses,
-            bool outerCombined = false) {
+            Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+            const List<Clause> &clauses, const ConstructQueue &queue,
+            ConstructQueue::iterator item, bool outerCombined = false) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   Fortran::lower::StatementContext stmtCtx;
 
@@ -1650,8 +1681,8 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
   Fortran::lower::pft::visitAllSymbols(eval, captureImplicitMap);
 
   auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps);
-  genBodyOfTargetOp(converter, symTable, semaCtx, eval, genNested, targetOp,
-                    mapSyms, mapLocs, mapTypes, loc);
+  genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, mapSyms,
+                    mapLocs, mapTypes, loc, queue, item);
   return targetOp;
 }
 
@@ -1659,8 +1690,9 @@ static mlir::omp::TargetDataOp
 genTargetDataOp(Fortran::lower::AbstractConverter &converter,
                 Fortran::lower::SymMap &symTable,
                 Fortran::semantics::SemanticsContext &semaCtx,
-                Fortran::lower::pft::Evaluation &eval, bool genNested,
-                mlir::Location loc, const List<Clause> &clauses) {
+                Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+                const List<Clause> &clauses, const ConstructQueue &queue,
+                ConstructQueue::iterator item) {
   Fortran::lower::StatementContext stmtCtx;
   mlir::omp::TargetDataClauseOps clauseOps;
   llvm::SmallVector<mlir::Type> useDeviceTypes;
@@ -1672,9 +1704,9 @@ genTargetDataOp(Fortran::lower::AbstractConverter &converter,
   auto targetDataOp =
       converter.getFirOpBuilder().create<mlir::omp::TargetDataOp>(loc,
                                                                   clauseOps);
-  genBodyOfTargetDataOp(converter, symTable, semaCtx, eval, genNested,
-                        targetDataOp, useDeviceTypes, useDeviceLocs,
-                        useDeviceSyms, loc);
+  genBodyOfTargetDataOp(converter, symTable, semaCtx, eval, targetDataOp,
+                        useDeviceTypes, useDeviceLocs, useDeviceSyms, loc,
+                        queue, item);
   return targetDataOp;
 }
 
@@ -1683,8 +1715,9 @@ static OpTy
 genTargetEnterExitUpdateDataOp(Fortran::lower::AbstractConverter &converter,
                                Fortran::lower::SymMap &symTable,
                                Fortran::semantics::SemanticsContext &semaCtx,
-                               mlir::Location loc,
-                               const List<Clause> &clauses) {
+                               mlir::Location loc, const List<Clause> &clauses,
+                               const ConstructQueue &queue,
+                               ConstructQueue::iterator item) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   Fortran::lower::StatementContext stmtCtx;
 
@@ -1711,8 +1744,9 @@ static mlir::omp::TaskOp
 genTaskOp(Fortran::lower::AbstractConverter &converter,
           Fortran::lower::SymMap &symTable,
           Fortran::semantics::SemanticsContext &semaCtx,
-          Fortran::lower::pft::Evaluation &eval, bool genNested,
-          mlir::Location loc, const List<Clause> &clauses) {
+          Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+          const List<Clause> &clauses, const ConstructQueue &queue,
+          ConstructQueue::iterator item) {
   Fortran::lower::StatementContext stmtCtx;
   mlir::omp::TaskClauseOps clauseOps;
   genTaskClauses(converter, semaCtx, stmtCtx, clauses, loc, clauseOps);
@@ -1720,26 +1754,25 @@ genTaskOp(Fortran::lower::AbstractConverter &converter,
   return genOpWithBody<mlir::omp::TaskOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_task)
-          .setGenNested(genNested)
           .setClauses(&clauses),
-      clauseOps);
+      queue, item, clauseOps);
 }
 
 static mlir::omp::TaskgroupOp
 genTaskgroupOp(Fortran::lower::AbstractConverter &converter,
                Fortran::lower::SymMap &symTable,
                Fortran::semantics::SemanticsContext &semaCtx,
-               Fortran::lower::pft::Evaluation &eval, bool genNested,
-               mlir::Location loc, const List<Clause> &clauses) {
+               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+               const List<Clause> &clauses, const ConstructQueue &queue,
+               ConstructQueue::iterator item) {
   mlir::omp::TaskgroupClauseOps clauseOps;
   genTaskgroupClauses(converter, semaCtx, clauses, loc, clauseOps);
 
   return genOpWithBody<mlir::omp::TaskgroupOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_taskgroup)
-          .setGenNested(genNested)
           .setClauses(&clauses),
-      clauseOps);
+      queue, item, clauseOps);
 }
 
 static mlir::omp::TaskloopOp
@@ -1747,7 +1780,8 @@ genTaskloopOp(Fortran::lower::AbstractConverter &converter,
               Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-              const List<Clause> &clauses) {
+              const List<Clause> &clauses, const ConstructQueue &queue,
+              ConstructQueue::iterator item) {
   TODO(loc, "Taskloop construct");
 }
 
@@ -1756,7 +1790,8 @@ genTaskwaitOp(Fortran::lower::AbstractConverter &converter,
               Fortran::lower::SymMap &symTable,
               Fortran::semantics::SemanticsContext &semaCtx,
               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-              const List<Clause> &clauses) {
+              const List<Clause> &clauses, const ConstructQueue &queue,
+              ConstructQueue::iterator item) {
   mlir::omp::TaskwaitClauseOps clauseOps;
   genTaskwaitClauses(converter, semaCtx, clauses, loc, clauseOps);
   return converter.getFirOpBuilder().create<mlir::omp::TaskwaitOp>(loc,
@@ -1767,7 +1802,8 @@ static mlir::omp::TaskyieldOp
 genTaskyieldOp(Fortran::lower::AbstractConverter &converter,
                Fortran::lower::SymMap &symTable,
                Fortran::semantics::SemanticsContext &semaCtx,
-               Fortran::lower::pft::Evaluation &eval, mlir::Location loc) {
+               Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+               const ConstructQueue &queue, ConstructQueue::iterator item) {
   return converter.getFirOpBuilder().create<mlir::omp::TaskyieldOp>(loc);
 }
 
@@ -1775,9 +1811,9 @@ static mlir::omp::TeamsOp
 genTeamsOp(Fortran::lower::AbstractConverter &converter,
            Fortran::lower::SymMap &symTable,
            Fortran::semantics::SemanticsContext &semaCtx,
-           Fortran::lower::pft::Evaluation &eval, bool genNested,
-           mlir::Location loc, const List<Clause> &clauses,
-           bool outerCombined = false) {
+           Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+           const List<Clause> &clauses, const ConstructQueue &queue,
+           ConstructQueue::iterator item, bool outerCombined = false) {
   Fortran::lower::StatementContext stmtCtx;
   mlir::omp::TeamsClauseOps clauseOps;
   genTeamsClauses(converter, semaCtx, stmtCtx, clauses, loc, clauseOps);
@@ -1785,10 +1821,9 @@ genTeamsOp(Fortran::lower::AbstractConverter &converter,
   return genOpWithBody<mlir::omp::TeamsOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
                         llvm::omp::Directive::OMPD_teams)
-          .setGenNested(genNested)
           .setOuterCombined(outerCombined)
           .setClauses(&clauses),
-      clauseOps);
+      queue, item, clauseOps);
 }
 
 static mlir::omp::WsloopOp
@@ -1796,7 +1831,8 @@ genWsloopOp(Fortran::lower::AbstractConverter &converter,
             Fortran::lower::SymMap &symTable,
             Fortran::semantics::SemanticsContext &semaCtx,
             Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
-            const List<Clause> &clauses) {
+            const List<Clause> &clauses, const ConstructQueue &queue,
+            ConstructQueue::iterator item) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   DataSharingProcessor dsp(converter, semaCtx, clauses, eval);
   dsp.processStep1();
@@ -1837,7 +1873,8 @@ genWsloopOp(Fortran::lower::AbstractConverter &converter,
                      .setClauses(&clauses)
                      .setDataSharingProcessor(&dsp)
                      .setReductions(&reductionSyms, &reductionTypes)
-                     .setGenRegionEntryCb(ivCallback));
+                     .setGenRegionEntryCb(ivCallback),
+                 queue, item);
   return wsloopOp;
 }
 
@@ -1845,13 +1882,13 @@ genWsloopOp(Fortran::lower::AbstractConverter &converter,
 // Code generation functions for composite constructs
 //===----------------------------------------------------------------------===//
 
-static void
-genCompositeDistributeParallelDo(Fortran::lower::AbstractConverter &converter,
-                                 Fortran::lower::SymMap &symTable,
-                                 Fortran::semantics::SemanticsContext &semaCtx,
-                                 Fortran::lower::pft::Evaluation &eval,
-                                 const List<Clause> &clauses,
-                                 mlir::Location loc) {
+static void genCompositeDistributeParallelDo(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::lower::SymMap &symTable,
+    Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+    const List<Clause> &clauses, const ConstructQueue &queue,
+    ConstructQueue::iterator item) {
   TODO(loc, "Composite DISTRIBUTE PARALLEL DO");
 }
 
@@ -1859,8 +1896,9 @@ static void genCompositeDistributeParallelDoSimd(
     Fortran::lower::AbstractConverter &converter,
     Fortran::lower::SymMap &symTable,
     Fortran::semantics::SemanticsContext &semaCtx,
-    Fortran::lower::pft::Evaluation &eval, const List<Clause> &clauses,
-    mlir::Location loc) {
+    Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+    const List<Clause> &clauses, const ConstructQueue &queue,
+    ConstructQueue::iterator item) {
   TODO(loc, "Composite DISTRIBUTE PARALLEL DO SIMD");
 }
 
@@ -1869,7 +1907,9 @@ genCompositeDistributeSimd(Fortran::lower::AbstractConverter &converter,
                            Fortran::lower::SymMap &symTable,
                            Fortran::semantics::SemanticsContext &semaCtx,
                            Fortran::lower::pft::Evaluation &eval,
-                           const List<Clause> &clauses, mlir::Location loc) {
+                           mlir::Location loc, const List<Clause> &clauses,
+                           const ConstructQueue &queue,
+                           ConstructQueue::iterator item) {
   TODO(loc, "Composite DISTRIBUTE SIMD");
 }
 
@@ -1877,8 +1917,9 @@ static void genCompositeDoSimd(Fortran::lower::AbstractConverter &converter,
                                Fortran::lower::SymMap &symTable,
                                Fortran::semantics::SemanticsContext &semaCtx,
                                Fortran::lower::pft::Evaluation &eval,
-                               const List<Clause> &clauses,
-                               mlir::Location loc) {
+                               mlir::Location loc, const List<Clause> &clauses,
+                               const ConstructQueue &queue,
+                               ConstructQueue::iterator item) {
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processTODO<clause::Aligned, clause::Allocate, clause::Linear,
                  clause::Order, clause::Safelen, clause::Simdlen>(
@@ -1891,7 +1932,7 @@ static void genCompositeDoSimd(Fortran::lower::AbstractConverter &converter,
   // When support for vectorization is enabled, then we need to add handling of
   // if clause. Currently if clause can be skipped because we always assume
   // SIMD length = 1.
-  genWsloopOp(converter, symTable, semaCtx, eval, loc, clauses);
+  genWsloopOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
 }
 
 static void
@@ -1899,10 +1940,126 @@ genCompositeTaskloopSimd(Fortran::lower::AbstractConverter &converter,
                          Fortran::lower::SymMap &symTable,
                          Fortran::semantics::SemanticsContext &semaCtx,
                          Fortran::lower::pft::Evaluation &eval,
-                         const List<Clause> &clauses, mlir::Location loc) {
+                         mlir::Location loc, const List<Clause> &clauses,
+                         const ConstructQueue &queue,
+                         ConstructQueue::iterator item) {
   TODO(loc, "Composite TASKLOOP SIMD");
 }
 
+//===----------------------------------------------------------------------===//
+// Dispatch
+//===----------------------------------------------------------------------===//
+
+static void genOMPDispatch(Fortran::lower::AbstractConverter &converter,
+                           Fortran::lower::SymMap &symTable,
+                           Fortran::semantics::SemanticsContext &semaCtx,
+                           Fortran::lower::pft::Evaluation &eval,
+                           mlir::Location loc, const ConstructQueue &queue,
+                           ConstructQueue::iterator item) {
+  assert(item != queue.end());
+  const List<Clause> &clauses = item->clauses;
+
+  switch (llvm::omp::Directive dir = item->id) {
+  case llvm::omp::Directive::OMPD_distribute:
+    genDistributeOp(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                    item);
+    break;
+  case llvm::omp::Directive::OMPD_do:
+    genWsloopOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_loop:
+  case llvm::omp::Directive::OMPD_masked:
+    TODO(loc, "Unhandled loop directive (" +
+                  llvm::omp::getOpenMPDirectiveName(dir) + ")");
+    break;
+  case llvm::omp::Directive::OMPD_master:
+    genMasterOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_ordered:
+    genOrderedRegionOp(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                       item);
+    break;
+  case llvm::omp::Directive::OMPD_parallel:
+    genParallelOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item,
+                  /*outerCombined=*/false);
+    break;
+  case llvm::omp::Directive::OMPD_sections:
+    genSectionsOp(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                  item);
+    break;
+  case llvm::omp::Directive::OMPD_simd:
+    genSimdOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_single:
+    genSingleOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_target:
+    genTargetOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item,
+                /*outerCombined=*/false);
+    break;
+  case llvm::omp::Directive::OMPD_target_data:
+    genTargetDataOp(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                    item);
+    break;
+  case llvm::omp::Directive::OMPD_target_enter_data:
+    genTargetEnterExitUpdateDataOp<mlir::omp::TargetEnterDataOp>(
+        converter, symTable, semaCtx, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_target_exit_data:
+    genTargetEnterExitUpdateDataOp<mlir::omp::TargetExitDataOp>(
+        converter, symTable, semaCtx, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_target_update:
+    genTargetEnterExitUpdateDataOp<mlir::omp::TargetUpdateOp>(
+        converter, symTable, semaCtx, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_task:
+    genTaskOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_taskgroup:
+    genTaskgroupOp(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                   item);
+    break;
+  case llvm::omp::Directive::OMPD_taskloop:
+    genTaskloopOp(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                  item);
+    break;
+  case llvm::omp::Directive::OMPD_teams:
+    genTeamsOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  // case llvm::omp::Directive::OMPD_workdistribute:
+  case llvm::omp::Directive::OMPD_workshare:
+    // FIXME: Workshare is not a commonly used OpenMP construct, an
+    // implementation for this feature will come later. For the codes
+    // that use this construct, add a single construct for now.
+    genSingleOp(converter, symTable, semaCtx, eval, loc, clauses, queue, item);
+    break;
+  // Composite constructs
+  case llvm::omp::Directive::OMPD_distribute_parallel_do:
+    genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval, loc,
+                                     clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_distribute_parallel_do_simd:
+    genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval,
+                                         loc, clauses, queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_distribute_simd:
+    genCompositeDistributeSimd(converter, symTable, semaCtx, eval, loc, clauses,
+                               queue, item);
+    break;
+  case llvm::omp::Directive::OMPD_do_simd:
+    genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, clauses, queue,
+                       item);
+    break;
+  case llvm::omp::Directive::OMPD_taskloop_simd:
+    genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, loc, clauses,
+                             queue, item);
+    break;
+  default:
+    break;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // OpenMPDeclarativeConstruct visitors
 //===----------------------------------------------------------------------===//
@@ -2013,36 +2170,45 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
       semaCtx);
   mlir::Location currentLocation = converter.genLocation(directive.source);
 
+  ConstructQueue queue{{DirectiveWithClauses{directive.v, clauses}}};
+
   switch (directive.v) {
   default:
     break;
   case llvm::omp::Directive::OMPD_barrier:
-    genBarrierOp(converter, symTable, semaCtx, eval, currentLocation);
+    genBarrierOp(converter, symTable, semaCtx, eval, currentLocation, queue,
+                 queue.begin());
     break;
   case llvm::omp::Directive::OMPD_taskwait:
-    genTaskwaitOp(converter, symTable, semaCtx, eval, currentLocation, clauses);
+    genTaskwaitOp(converter, symTable, semaCtx, eval, currentLocation, clauses,
+                  queue, queue.begin());
     break;
   case llvm::omp::Directive::OMPD_taskyield:
-    genTaskyieldOp(converter, symTable, semaCtx, eval, currentLocation);
+    genTaskyieldOp(converter, symTable, semaCtx, eval, currentLocation, queue,
+                   queue.begin());
     break;
   case llvm::omp::Directive::OMPD_target_data:
-    genTargetDataOp(converter, symTable, semaCtx, eval, /*genNested=*/true,
-                    currentLocation, clauses);
+    genTargetDataOp(converter, symTable, semaCtx, eval, currentLocation,
+                    clauses, queue, queue.begin());
     break;
   case llvm::omp::Directive::OMPD_target_enter_data:
     genTargetEnterExitUpdateDataOp<mlir::omp::TargetEnterDataOp>(
-        converter, symTable, semaCtx, currentLocation, clauses);
+        converter, symTable, semaCtx, currentLocation, clauses, queue,
+        queue.begin());
     break;
   case llvm::omp::Directive::OMPD_target_exit_data:
     genTargetEnterExitUpdateDataOp<mlir::omp::TargetExitDataOp>(
-        converter, symTable, semaCtx, currentLocation, clauses);
+        converter, symTable, semaCtx, currentLocation, clauses, queue,
+        queue.begin());
     break;
   case llvm::omp::Directive::OMPD_target_update:
     genTargetEnterExitUpdateDataOp<mlir::omp::TargetUpdateOp>(
-        converter, symTable, semaCtx, currentLocation, clauses);
+        converter, symTable, semaCtx, currentLocation, clauses, queue,
+        queue.begin());
     break;
   case llvm::omp::Directive::OMPD_ordered:
-    genOrderedOp(converter, symTable, semaCtx, eval, currentLocation, clauses);
+    genOrderedOp(converter, symTable, semaCtx, eval, currentLocation, clauses,
+                 queue, queue.begin());
     break;
   }
 }
@@ -2066,8 +2232,11 @@ genOMP(Fortran::lower::AbstractConverter &converter,
                             [&](auto &&s) { return makeClause(s.v, semaCtx); })
                  : List<Clause>{};
   mlir::Location currentLocation = converter.genLocation(verbatim.source);
+
+  ConstructQueue queue{
+      DirectiveWithClauses{llvm::omp::Directive::OMPD_flush, clauses}};
   genFlushOp(converter, symTable, semaCtx, eval, currentLocation, objects,
-             clauses);
+             clauses, queue, queue.begin());
 }
 
 static void
@@ -2210,75 +2379,13 @@ genOMP(Fortran::lower::AbstractConverter &converter,
     }
   }
 
-  std::optional<llvm::omp::Directive> nextDir = origDirective;
-  bool outermostLeafConstruct = true;
-  while (nextDir) {
-    llvm::omp::Directive leafDir;
-    std::tie(leafDir, nextDir) = splitCombinedDirective(*nextDir);
-    const bool genNested = !nextDir;
-    const bool outerCombined = outermostLeafConstruct && nextDir.has_value();
-    switch (leafDir) {
-    case llvm::omp::Directive::OMPD_master:
-      // 2.16 MASTER construct.
-      genMasterOp(converter, symTable, semaCtx, eval, genNested,
-                  currentLocation);
-      break;
-    case llvm::omp::Directive::OMPD_ordered:
-      // 2.17.9 ORDERED construct.
-      genOrderedRegionOp(converter, symTable, semaCtx, eval, genNested,
-                         currentLocation, clauses);
-      break;
-    case llvm::omp::Directive::OMPD_parallel:
-      // 2.6 PARALLEL construct.
-      genParallelOp(converter, symTable, semaCtx, eval, genNested,
-                    currentLocation, clauses, outerCombined);
-      break;
-    case llvm::omp::Directive::OMPD_single:
-      // 2.8.2 SINGLE construct.
-      genSingleOp(converter, symTable, semaCtx, eval, genNested,
-                  currentLocation, clauses);
-      break;
-    case llvm::omp::Directive::OMPD_target:
-      // 2.12.5 TARGET construct.
-      genTargetOp(converter, symTable, semaCtx, eval, genNested,
-                  currentLocation, clauses, outerCombined);
-      break;
-    case llvm::omp::Directive::OMPD_target_data:
-      // 2.12.2 TARGET DATA construct.
-      genTargetDataOp(converter, symTable, semaCtx, eval, genNested,
-                      currentLocation, clauses);
-      break;
-    case llvm::omp::Directive::OMPD_task:
-      // 2.10.1 TASK construct.
-      genTaskOp(converter, symTable, semaCtx, eval, genNested, currentLocation,
-                clauses);
-      break;
-    case llvm::omp::Directive::OMPD_taskgroup:
-      // 2.17.6 TASKGROUP construct.
-      genTaskgroupOp(converter, symTable, semaCtx, eval, genNested,
-                     currentLocation, clauses);
-      break;
-    case llvm::omp::Directive::OMPD_teams:
-      // 2.7 TEAMS construct.
-      // FIXME Pass the outerCombined argument or rename it to better describe
-      // what it represents if it must always be `false` in this context.
-      genTeamsOp(converter, symTable, semaCtx, eval, genNested, currentLocation,
-                 clauses);
-      break;
-    case llvm::omp::Directive::OMPD_workshare:
-      // 2.8.3 WORKSHARE construct.
-      // FIXME: Workshare is not a commonly used OpenMP construct, an
-      // implementation for this feature will come later. For the codes
-      // that use this construct, add a single construct for now.
-      genSingleOp(converter, symTable, semaCtx, eval, genNested,
-                  currentLocation, clauses);
-      break;
-    default:
-      llvm_unreachable("Unexpected block construct");
-      break;
-    }
-    outermostLeafConstruct = false;
-  }
+  llvm::omp::Directive directive =
+      std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v;
+  ConstructQueue queue;
+  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
+                         directive, clauses, queue);
+  genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
+                 queue.begin());
 }
 
 static void
@@ -2291,10 +2398,15 @@ genOMP(Fortran::lower::AbstractConverter &converter,
       std::get<Fortran::parser::OmpCriticalDirective>(criticalConstruct.t);
   List<Clause> clauses =
       makeClauses(std::get<Fortran::parser::OmpClauseList>(cd.t), semaCtx);
+
+  ConstructQueue queue;
+  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
+                         llvm::omp::Directive::OMPD_critical, clauses, queue);
+
   const auto &name = std::get<std::optional<Fortran::parser::Name>>(cd.t);
   mlir::Location currentLocation = converter.getCurrentLocation();
-  genCriticalOp(converter, symTable, semaCtx, eval, /*genNested=*/true,
-                currentLocation, clauses, name);
+  genCriticalOp(converter, symTable, semaCtx, eval, currentLocation, clauses,
+                queue, queue.begin(), name);
 }
 
 static void
@@ -2315,14 +2427,6 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
       std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t);
   List<Clause> clauses = makeClauses(
       std::get<Fortran::parser::OmpClauseList>(beginLoopDirective.t), semaCtx);
-  mlir::Location currentLocation =
-      converter.genLocation(beginLoopDirective.source);
-  const auto origDirective =
-      std::get<Fortran::parser::OmpLoopDirective>(beginLoopDirective.t).v;
-
-  assert(llvm::omp::loopConstructSet.test(origDirective) &&
-         "Expected loop construct");
-
   if (auto &endLoopDirective =
           std::get<std::optional<Fortran::parser::OmpEndLoopDirective>>(
               loopConstruct.t)) {
@@ -2331,101 +2435,16 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
         semaCtx));
   }
 
-  std::optional<llvm::omp::Directive> nextDir = origDirective;
-  while (nextDir) {
-    llvm::omp::Directive leafDir;
-    std::tie(leafDir, nextDir) = splitCombinedDirective(*nextDir);
-    if (llvm::omp::compositeConstructSet.test(leafDir)) {
-      assert(!nextDir && "Composite construct cannot be split");
-      switch (leafDir) {
-      case llvm::omp::Directive::OMPD_distribute_parallel_do:
-        // 2.9.4.3 DISTRIBUTE PARALLEL Worksharing-Loop construct.
-        genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval,
-                                         clauses, currentLocation);
-        break;
-      case llvm::omp::Directive::OMPD_distribute_parallel_do_simd:
-        // 2.9.4.4 DISTRIBUTE PARALLEL Worksharing-Loop SIMD construct.
-        genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval,
-                                             clauses, currentLocation);
-        break;
-      case llvm::omp::Directive::OMPD_distribute_simd:
-        // 2.9.4.2 DISTRIBUTE SIMD construct.
-        genCompositeDistributeSimd(converter, symTable, semaCtx, eval, clauses,
-                                   currentLocation);
-        break;
-      case llvm::omp::Directive::OMPD_do_simd:
-        // 2.9.3.2 Worksharing-Loop SIMD construct.
-        genCompositeDoSimd(converter, symTable, semaCtx, eval, clauses,
-                           currentLocation);
-        break;
-      case llvm::omp::Directive::OMPD_taskloop_simd:
-        // 2.10.3 TASKLOOP SIMD construct.
-        genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, clauses,
-                                 currentLocation);
-        break;
-      default:
-        llvm_unreachable("Unexpected composite construct");
-      }
-    } else {
-      const bool genNested = !nextDir;
-      switch (leafDir) {
-      case llvm::omp::Directive::OMPD_distribute:
-        // 2.9.4.1 DISTRIBUTE construct.
-        genDistributeOp(converter, symTable, semaCtx, eval, genNested,
-                        currentLocation, clauses);
-        break;
-      case llvm::omp::Directive::OMPD_do:
-        // 2.9.2 Worksharing-Loop construct.
-        genWsloopOp(converter, symTable, semaCtx, eval, currentLocation,
-                    clauses);
-        break;
-      case llvm::omp::Directive::OMPD_parallel:
-        // 2.6 PARALLEL construct.
-        // FIXME This is not necessarily always the outer leaf construct of a
-        // combined construct in this constext (e.g. distribute parallel do).
-        // Maybe rename the argument if it represents something else or
-        // initialize it properly.
-        genParallelOp(converter, symTable, semaCtx, eval, genNested,
-                      currentLocation, clauses,
-                      /*outerCombined=*/true);
-        break;
-      case llvm::omp::Directive::OMPD_simd:
-        // 2.9.3.1 SIMD construct.
-        genSimdOp(converter, symTable, semaCtx, eval, currentLocation, clauses);
-        break;
-      case llvm::omp::Directive::OMPD_target:
-        // 2.12.5 TARGET construct.
-        genTargetOp(converter, symTable, semaCtx, eval, genNested,
-                    currentLocation, clauses, /*outerCombined=*/true);
-        break;
-      case llvm::omp::Directive::OMPD_taskloop:
-        // 2.10.2 TASKLOOP construct.
-        genTaskloopOp(converter, symTable, semaCtx, eval, currentLocation,
-                      clauses);
-        break;
-      case llvm::omp::Directive::OMPD_teams:
-        // 2.7 TEAMS construct.
-        // FIXME This is not necessarily always the outer leaf construct of a
-        // combined construct in this constext (e.g. target teams distribute).
-        // Maybe rename the argument if it represents something else or
-        // initialize it properly.
-        genTeamsOp(converter, symTable, semaCtx, eval, genNested,
-                   currentLocation, clauses, /*outerCombined=*/true);
-        break;
-      case llvm::omp::Directive::OMPD_loop:
-      case llvm::omp::Directive::OMPD_masked:
-      case llvm::omp::Directive::OMPD_master:
-      case llvm::omp::Directive::OMPD_tile:
-      case llvm::omp::Directive::OMPD_unroll:
-        TODO(currentLocation, "Unhandled loop directive (" +
-                                  llvm::omp::getOpenMPDirectiveName(leafDir) +
-                                  ")");
-        break;
-      default:
-        llvm_unreachable("Unexpected loop construct");
-      }
-    }
-  }
+  mlir::Location currentLocation =
+      converter.genLocation(beginLoopDirective.source);
+
+  llvm::omp::Directive directive =
+      std::get<parser::OmpLoopDirective>(beginLoopDirective.t).v;
+  ConstructQueue queue;
+  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
+                         directive, clauses, queue);
+  genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
+                 queue.begin());
 }
 
 static void
@@ -2434,8 +2453,11 @@ genOMP(Fortran::lower::AbstractConverter &converter,
        Fortran::semantics::SemanticsContext &semaCtx,
        Fortran::lower::pft::Evaluation &eval,
        const Fortran::parser::OpenMPSectionConstruct §ionConstruct) {
-  // SECTION constructs are handled as a part of SECTIONS.
-  llvm_unreachable("Unexpected standalone OMP SECTION");
+  mlir::Location loc = converter.getCurrentLocation();
+  ConstructQueue queue{
+      DirectiveWithClauses{llvm::omp::Directive::OMPD_section}};
+  genSectionOp(converter, symTable, semaCtx, eval, loc,
+               /*clauses=*/{}, queue, queue.begin());
 }
 
 static void
@@ -2454,39 +2476,15 @@ genOMP(Fortran::lower::AbstractConverter &converter,
   clauses.append(makeClauses(
       std::get<Fortran::parser::OmpClauseList>(endSectionsDirective.t),
       semaCtx));
-
-  // Process clauses before optional omp.parallel, so that new variables are
-  // allocated outside of the parallel region
   mlir::Location currentLocation = converter.getCurrentLocation();
-  mlir::omp::SectionsClauseOps clauseOps;
-  genSectionsClauses(converter, semaCtx, clauses, currentLocation, clauseOps);
-
-  // Parallel wrapper of PARALLEL SECTIONS construct
-  llvm::omp::Directive dir =
-      std::get<Fortran::parser::OmpSectionsDirective>(beginSectionsDirective.t)
-          .v;
-  if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
-    genParallelOp(converter, symTable, semaCtx, eval,
-                  /*genNested=*/false, currentLocation, clauses,
-                  /*outerCombined=*/true);
-  }
 
-  // SECTIONS construct.
-  genSectionsOp(converter, symTable, semaCtx, eval, currentLocation, clauseOps);
-
-  // Generate nested SECTION operations recursively.
-  const auto §ionBlocks =
-      std::get<Fortran::parser::OmpSectionBlocks>(sectionsConstruct.t);
-  auto &firOpBuilder = converter.getFirOpBuilder();
-  auto ip = firOpBuilder.saveInsertionPoint();
-  for (const auto &[nblock, neval] :
-       llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) {
-    symTable.pushScope();
-    genSectionOp(converter, symTable, semaCtx, neval, /*genNested=*/true,
-                 currentLocation, clauses);
-    symTable.popScope();
-    firOpBuilder.restoreInsertionPoint(ip);
-  }
+  llvm::omp::Directive directive =
+      std::get<parser::OmpSectionsDirective>(beginSectionsDirective.t).v;
+  ConstructQueue queue;
+  splitCompoundConstruct(converter.getFirOpBuilder().getModule(), semaCtx, eval,
+                         directive, clauses, queue);
+  genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
+                 queue.begin());
 }
 
 static void genOMP(Fortran::lower::AbstractConverter &converter,
diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
new file mode 100644
index 00000000000000..f8a18a1bed7ae6
--- /dev/null
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
@@ -0,0 +1,985 @@
+#ifndef LLVM_FRONTEND_OPENMP_COMPOUNDSPLITTERT_H
+#define LLVM_FRONTEND_OPENMP_COMPOUNDSPLITTERT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Frontend/OpenMP/ClauseT.h"
+#include "llvm/Frontend/OpenMP/OMP.h"
+
+#include <iterator>
+#include <list>
+#include <optional>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <variant>
+
+static inline llvm::ArrayRef<llvm::omp::Directive> getWorksharing() {
+  static llvm::omp::Directive worksharing[] = {
+      llvm::omp::Directive::OMPD_do,     llvm::omp::Directive::OMPD_for,
+      llvm::omp::Directive::OMPD_scope,  llvm::omp::Directive::OMPD_sections,
+      llvm::omp::Directive::OMPD_single, llvm::omp::Directive::OMPD_workshare,
+  };
+  return worksharing;
+}
+
+static inline llvm::ArrayRef<llvm::omp::Directive> getWorksharingLoop() {
+  static llvm::omp::Directive worksharingLoop[] = {
+      llvm::omp::Directive::OMPD_do,
+      llvm::omp::Directive::OMPD_for,
+  };
+  return worksharingLoop;
+}
+
+namespace detail {
+template <typename Container, typename Predicate>
+typename std::remove_reference_t<Container>::iterator
+find_unique(Container &&container, Predicate &&pred) {
+  auto first = std::find_if(container.begin(), container.end(), pred);
+  if (first == container.end())
+    return first;
+  auto second = std::find_if(std::next(first), container.end(), pred);
+  if (second == container.end())
+    return first;
+  return container.end();
+}
+} // namespace detail
+
+namespace tomp {
+
+template <typename ClauseType> struct DirectiveWithClauses {
+  llvm::omp::Directive id = llvm::omp::Directive::OMPD_unknown;
+  tomp::type::ListT<ClauseType> clauses;
+};
+
+template <typename ClauseType, typename HelperType>
+struct ConstructDecompositionT {
+  using ClauseTy = ClauseType;
+
+  using TypeTy = typename ClauseTy::TypeTy;
+  using IdTy = typename ClauseTy::IdTy;
+  using ExprTy = typename ClauseTy::ExprTy;
+  using HelperTy = HelperType;
+  using ObjectTy = tomp::ObjectT<IdTy, ExprTy>;
+
+  using ClauseSet = llvm::DenseSet<const ClauseTy *>;
+
+  ConstructDecompositionT(uint32_t ver, HelperType &hlp,
+                          llvm::omp::Directive dir,
+                          llvm::ArrayRef<ClauseTy> clauses)
+      : version(ver), construct(dir), helper(hlp) {
+    for (const ClauseTy &clause : clauses)
+      nodes.push_back(&clause);
+
+    bool success = split();
+    if (success) {
+      // Copy the broken down directives with their clauses to the
+      // output list. Copy by value, since we don't own the storage
+      // with the input clauses, and the internal representation uses
+      // clause addresses.
+      for (auto &leaf : leafs) {
+        output.push_back({leaf.id});
+        auto &dwc = output.back();
+        for (const ClauseTy *c : leaf.clauses)
+          dwc.clauses.push_back(*c);
+      }
+    }
+  }
+
+  tomp::ListT<DirectiveWithClauses<ClauseType>> output;
+
+private:
+  bool split();
+
+  struct LeafReprInternal {
+    llvm::omp::Directive id = llvm::omp::Directive::OMPD_unknown;
+    tomp::type::ListT<const ClauseTy *> clauses;
+  };
+
+  LeafReprInternal *findDirective(llvm::omp::Directive dirId) {
+    auto found = llvm::find_if(
+        leafs, [&](const LeafReprInternal &leaf) { return leaf.id == dirId; });
+    return found != leafs.end() ? &*found : nullptr;
+  }
+
+  ClauseSet *findClausesWith(const ObjectTy &object) {
+    if (auto found = syms.find(object.id()); found != syms.end())
+      return &found->second;
+    return nullptr;
+  }
+
+  template <typename S>
+  ClauseTy *makeClause(llvm::omp::Clause clauseId, S &&specific) {
+    implicit.push_back(ClauseTy{clauseId, std::move(specific)});
+    return &implicit.back();
+  }
+
+  void addClauseSymsToMap(const ObjectTy &object, const ClauseTy *);
+  void addClauseSymsToMap(const tomp::ObjectListT<IdTy, ExprTy> &objects,
+                          const ClauseTy *);
+  void addClauseSymsToMap(const TypeTy &item, const ClauseTy *);
+  void addClauseSymsToMap(const ExprTy &item, const ClauseTy *);
+  void addClauseSymsToMap(const tomp::clause::MapT<TypeTy, IdTy, ExprTy> &item,
+                          const ClauseTy *);
+
+  template <typename U>
+  void addClauseSymsToMap(const std::optional<U> &item, const ClauseTy *);
+  template <typename U>
+  void addClauseSymsToMap(const tomp::ListT<U> &item, const ClauseTy *);
+  template <typename... U, size_t... Is>
+  void addClauseSymsToMap(const std::tuple<U...> &item, const ClauseTy *,
+                          std::index_sequence<Is...> = {});
+  template <typename U>
+  std::enable_if_t<std::is_enum_v<llvm::remove_cvref_t<U>>, void>
+  addClauseSymsToMap(U &&item, const ClauseTy *);
+
+  template <typename U>
+  std::enable_if_t<llvm::remove_cvref_t<U>::EmptyTrait::value, void>
+  addClauseSymsToMap(U &&item, const ClauseTy *);
+
+  template <typename U>
+  std::enable_if_t<llvm::remove_cvref_t<U>::IncompleteTrait::value, void>
+  addClauseSymsToMap(U &&item, const ClauseTy *);
+
+  template <typename U>
+  std::enable_if_t<llvm::remove_cvref_t<U>::WrapperTrait::value, void>
+  addClauseSymsToMap(U &&item, const ClauseTy *);
+
+  template <typename U>
+  std::enable_if_t<llvm::remove_cvref_t<U>::TupleTrait::value, void>
+  addClauseSymsToMap(U &&item, const ClauseTy *);
+
+  template <typename U>
+  std::enable_if_t<llvm::remove_cvref_t<U>::UnionTrait::value, void>
+  addClauseSymsToMap(U &&item, const ClauseTy *);
+
+  // Apply a clause to the only directive that allows it. If there are no
+  // directives that allow it, or if there is more that one, do not apply
+  // anything and return false, otherwise return true.
+  bool applyToUnique(const ClauseTy *node);
+
+  // Apply a clause to the first directive in given range that allows it.
+  // If such a directive does not exist, return false, otherwise return true.
+  template <typename Iterator>
+  bool applyToFirst(const ClauseTy *node, llvm::iterator_range<Iterator> range);
+
+  // Apply a clause to the innermost directive that allows it. If such a
+  // directive does not exist, return false, otherwise return true.
+  bool applyToInnermost(const ClauseTy *node);
+
+  // Apply a clause to the outermost directive that allows it. If such a
+  // directive does not exist, return false, otherwise return true.
+  bool applyToOutermost(const ClauseTy *node);
+
+  template <typename Predicate>
+  bool applyIf(const ClauseTy *node, Predicate shouldApply);
+
+  bool applyToAll(const ClauseTy *node);
+
+  template <typename Clause>
+  bool applyClause(Clause &&clause, const ClauseTy *node);
+
+  bool applyClause(const tomp::clause::CollapseT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::PrivateT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool
+  applyClause(const tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy> &clause,
+              const ClauseTy *);
+  bool
+  applyClause(const tomp::clause::LastprivateT<TypeTy, IdTy, ExprTy> &clause,
+              const ClauseTy *);
+  bool applyClause(const tomp::clause::SharedT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::DefaultT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool
+  applyClause(const tomp::clause::ThreadLimitT<TypeTy, IdTy, ExprTy> &clause,
+              const ClauseTy *);
+  bool applyClause(const tomp::clause::OrderT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::AllocateT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::ReductionT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::IfT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::LinearT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+  bool applyClause(const tomp::clause::NowaitT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
+
+  uint32_t version;
+  llvm::omp::Directive construct;
+  HelperType &helper;
+  ListT<LeafReprInternal> leafs;
+  tomp::ListT<const ClauseTy *> nodes;
+  std::list<ClauseTy> implicit; // Container for materialized implicit clauses.
+                                // Inserting must preserve element addresses.
+  llvm::DenseMap<IdTy, ClauseSet> syms;
+  llvm::DenseSet<IdTy> mapBases;
+};
+
+// Deduction guide
+template <typename ClauseType, typename HelperType>
+ConstructDecompositionT(uint32_t, HelperType &, llvm::omp::Directive,
+                        llvm::ArrayRef<ClauseType>)
+    -> ConstructDecompositionT<ClauseType, HelperType>;
+
+template <typename C, typename H>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(const ObjectTy &object,
+                                                       const ClauseTy *node) {
+  syms[object.id()].insert(node);
+}
+
+template <typename C, typename H>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(
+    const tomp::ObjectListT<IdTy, ExprTy> &objects, const ClauseTy *node) {
+  for (auto &object : objects)
+    syms[object.id()].insert(node);
+}
+
+template <typename C, typename H>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(const TypeTy &item,
+                                                       const ClauseTy *node) {
+  // Nothing to do for types.
+}
+
+template <typename C, typename H>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(const ExprTy &item,
+                                                       const ClauseTy *node) {
+  // Nothing to do for expressions.
+}
+
+template <typename C, typename H>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(
+    const tomp::clause::MapT<TypeTy, IdTy, ExprTy> &item,
+    const ClauseTy *node) {
+  auto &objects = std::get<tomp::ObjectListT<IdTy, ExprTy>>(item.t);
+  addClauseSymsToMap(objects, node);
+  for (auto &object : objects) {
+    if (auto base = helper.getBaseObject(object))
+      mapBases.insert(base->id());
+  }
+}
+
+template <typename C, typename H>
+template <typename U>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(
+    const std::optional<U> &item, const ClauseTy *node) {
+  if (item)
+    addClauseSymsToMap(*item, node);
+}
+
+template <typename C, typename H>
+template <typename U>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(
+    const tomp::ListT<U> &item, const ClauseTy *node) {
+  for (auto &s : item)
+    addClauseSymsToMap(s, node);
+}
+
+template <typename C, typename H>
+template <typename... U, size_t... Is>
+void ConstructDecompositionT<C, H>::addClauseSymsToMap(
+    const std::tuple<U...> &item, const ClauseTy *node,
+    std::index_sequence<Is...>) {
+  (void)node; // Silence strange warning from GCC.
+  (addClauseSymsToMap(std::get<Is>(item), node), ...);
+}
+
+template <typename C, typename H>
+template <typename U>
+std::enable_if_t<std::is_enum_v<llvm::remove_cvref_t<U>>, void>
+ConstructDecompositionT<C, H>::addClauseSymsToMap(U &&item,
+                                                  const ClauseTy *node) {
+  // Nothing to do for enums.
+}
+
+template <typename C, typename H>
+template <typename U>
+std::enable_if_t<llvm::remove_cvref_t<U>::EmptyTrait::value, void>
+ConstructDecompositionT<C, H>::addClauseSymsToMap(U &&item,
+                                                  const ClauseTy *node) {
+  // Nothing to do for an empty class.
+}
+
+template <typename C, typename H>
+template <typename U>
+std::enable_if_t<llvm::remove_cvref_t<U>::IncompleteTrait::value, void>
+ConstructDecompositionT<C, H>::addClauseSymsToMap(U &&item,
+                                                  const ClauseTy *node) {
+  // Nothing to do for an incomplete class (they're empty).
+}
+
+template <typename C, typename H>
+template <typename U>
+std::enable_if_t<llvm::remove_cvref_t<U>::WrapperTrait::value, void>
+ConstructDecompositionT<C, H>::addClauseSymsToMap(U &&item,
+                                                  const ClauseTy *node) {
+  addClauseSymsToMap(item.v, node);
+}
+
+template <typename C, typename H>
+template <typename U>
+std::enable_if_t<llvm::remove_cvref_t<U>::TupleTrait::value, void>
+ConstructDecompositionT<C, H>::addClauseSymsToMap(U &&item,
+                                                  const ClauseTy *node) {
+  constexpr size_t tuple_size =
+      std::tuple_size_v<llvm::remove_cvref_t<decltype(item.t)>>;
+  addClauseSymsToMap(item.t, node, std::make_index_sequence<tuple_size>{});
+}
+
+template <typename C, typename H>
+template <typename U>
+std::enable_if_t<llvm::remove_cvref_t<U>::UnionTrait::value, void>
+ConstructDecompositionT<C, H>::addClauseSymsToMap(U &&item,
+                                                  const ClauseTy *node) {
+  std::visit([&](auto &&s) { addClauseSymsToMap(s, node); }, item.u);
+}
+
+// Apply a clause to the only directive that allows it. If there are no
+// directives that allow it, or if there is more that one, do not apply
+// anything and return false, otherwise return true.
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyToUnique(const ClauseTy *node) {
+  auto unique = detail::find_unique(leafs, [=](const auto &dirInfo) {
+    return llvm::omp::isAllowedClauseForDirective(dirInfo.id, node->id,
+                                                  version);
+  });
+
+  if (unique != leafs.end()) {
+    unique->clauses.push_back(node);
+    return true;
+  }
+  return false;
+}
+
+// Apply a clause to the first directive in given range that allows it.
+// If such a directive does not exist, return false, otherwise return true.
+template <typename C, typename H>
+template <typename Iterator>
+bool ConstructDecompositionT<C, H>::applyToFirst(
+    const ClauseTy *node, llvm::iterator_range<Iterator> range) {
+  if (range.empty())
+    return false;
+
+  for (auto &dwc : range) {
+    if (!llvm::omp::isAllowedClauseForDirective(dwc.id, node->id, version))
+      continue;
+    dwc.clauses.push_back(node);
+    return true;
+  }
+  return false;
+}
+
+// Apply a clause to the innermost directive that allows it. If such a
+// directive does not exist, return false, otherwise return true.
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyToInnermost(const ClauseTy *node) {
+  return applyToFirst(node, llvm::reverse(leafs));
+}
+
+// Apply a clause to the outermost directive that allows it. If such a
+// directive does not exist, return false, otherwise return true.
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyToOutermost(const ClauseTy *node) {
+  return applyToFirst(node, llvm::iterator_range(leafs));
+}
+
+template <typename C, typename H>
+template <typename Predicate>
+bool ConstructDecompositionT<C, H>::applyIf(const ClauseTy *node,
+                                            Predicate shouldApply) {
+  bool applied = false;
+  for (auto &dwc : leafs) {
+    if (!llvm::omp::isAllowedClauseForDirective(dwc.id, node->id, version))
+      continue;
+    if (!shouldApply(dwc))
+      continue;
+    dwc.clauses.push_back(node);
+    applied = true;
+  }
+
+  return applied;
+}
+
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyToAll(const ClauseTy *node) {
+  return applyIf(node, [](auto) { return true; });
+}
+
+template <typename C, typename H>
+template <typename Clause>
+bool ConstructDecompositionT<C, H>::applyClause(Clause &&clause,
+                                                const ClauseTy *node) {
+  // The default behavior is to find the unique directive to which the
+  // given clause may be applied. If there are no such directives, or
+  // if there are multiple ones, flag an error.
+  // From "OpenMP Application Programming Interface", Version 5.2:
+  // S Some clauses are permitted only on a single leaf construct of the
+  // S combined or composite construct, in which case the effect is as if
+  // S the clause is applied to that specific construct. (p339, 31-33)
+  if (applyToUnique(node))
+    return true;
+
+  return false;
+}
+
+// COLLAPSE
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::CollapseT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // Apply COLLAPSE to the innermost directive. If it's not one that
+  // allows it flag an error.
+  if (!leafs.empty()) {
+    auto &last = leafs.back();
+
+    if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) {
+      last.clauses.push_back(node);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+// PRIVATE
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::PrivateT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  return applyToInnermost(node);
+}
+
+// FIRSTPRIVATE
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  bool applied = false;
+
+  // S Section 17.2
+  // S The effect of the firstprivate clause is as if it is applied to one
+  // S or more leaf constructs as follows:
+
+  // S - To the distribute construct if it is among the constituent constructs;
+  // S - To the teams construct if it is among the constituent constructs and
+  // S   the distribute construct is not;
+  auto hasDistribute = findDirective(llvm::omp::OMPD_distribute);
+  auto hasTeams = findDirective(llvm::omp::OMPD_teams);
+  if (hasDistribute != nullptr) {
+    hasDistribute->clauses.push_back(node);
+    applied = true;
+    // S If the teams construct is among the constituent constructs and the
+    // S effect is not as if the firstprivate clause is applied to it by the
+    // S above rules, then the effect is as if the shared clause with the
+    // S same list item is applied to the teams construct.
+    if (hasTeams != nullptr) {
+      auto *shared = makeClause(
+          llvm::omp::Clause::OMPC_shared,
+          tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/clause.v});
+      hasTeams->clauses.push_back(shared);
+    }
+  } else if (hasTeams != nullptr) {
+    hasTeams->clauses.push_back(node);
+    applied = true;
+  }
+
+  // S - To a worksharing construct that accepts the clause if one is among
+  // S   the constituent constructs;
+  auto findWorksharing = [&]() {
+    auto worksharing = getWorksharing();
+    for (auto &dwc : leafs) {
+      auto found = llvm::find(worksharing, dwc.id);
+      if (found != std::end(worksharing))
+        return &dwc;
+    }
+    return static_cast<typename decltype(leafs)::value_type *>(nullptr);
+  };
+
+  auto hasWorksharing = findWorksharing();
+  if (hasWorksharing != nullptr) {
+    hasWorksharing->clauses.push_back(node);
+    applied = true;
+  }
+
+  // S - To the taskloop construct if it is among the constituent constructs;
+  auto hasTaskloop = findDirective(llvm::omp::OMPD_taskloop);
+  if (hasTaskloop != nullptr) {
+    hasTaskloop->clauses.push_back(node);
+    applied = true;
+  }
+
+  // S - To the parallel construct if it is among the constituent constructs
+  // S   and neither a taskloop construct nor a worksharing construct that
+  // S   accepts the clause is among them;
+  auto hasParallel = findDirective(llvm::omp::OMPD_parallel);
+  if (hasParallel != nullptr) {
+    if (hasTaskloop == nullptr && hasWorksharing == nullptr) {
+      hasParallel->clauses.push_back(node);
+      applied = true;
+    } else {
+      // S If the parallel construct is among the constituent constructs and
+      // S the effect is not as if the firstprivate clause is applied to it by
+      // S the above rules, then the effect is as if the shared clause with
+      // S the same list item is applied to the parallel construct.
+      auto *shared = makeClause(
+          llvm::omp::Clause::OMPC_shared,
+          tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/clause.v});
+      hasParallel->clauses.push_back(shared);
+    }
+  }
+
+  // S - To the target construct if it is among the constituent constructs
+  // S   and the same list item neither appears in a lastprivate clause nor
+  // S   is the base variable or base pointer of a list item that appears in
+  // S   a map clause.
+  auto inLastprivate = [&](const ObjectTy &object) {
+    if (ClauseSet *set = findClausesWith(object)) {
+      return llvm::find_if(*set, [](const ClauseTy *c) {
+               return c->id == llvm::omp::Clause::OMPC_lastprivate;
+             }) != set->end();
+    }
+    return false;
+  };
+
+  auto hasTarget = findDirective(llvm::omp::OMPD_target);
+  if (hasTarget != nullptr) {
+    tomp::ObjectListT<IdTy, ExprTy> objects;
+    llvm::copy_if(
+        clause.v, std::back_inserter(objects), [&](const ObjectTy &object) {
+          return !inLastprivate(object) && !mapBases.contains(object.id());
+        });
+    if (!objects.empty()) {
+      auto *firstp = makeClause(
+          llvm::omp::Clause::OMPC_firstprivate,
+          tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy>{/*List=*/objects});
+      hasTarget->clauses.push_back(firstp);
+      applied = true;
+    }
+  }
+
+  return applied;
+}
+
+// LASTPRIVATE
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::LastprivateT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  bool applied = false;
+
+  // S The effect of the lastprivate clause is as if it is applied to all leaf
+  // S constructs that permit the clause.
+  applied = applyToAll(node);
+  if (!applied)
+    return false;
+
+  auto inFirstprivate = [&](const ObjectTy &object) {
+    if (ClauseSet *set = findClausesWith(object)) {
+      return llvm::find_if(*set, [](const ClauseTy *c) {
+               return c->id == llvm::omp::Clause::OMPC_firstprivate;
+             }) != set->end();
+    }
+    return false;
+  };
+
+  auto &objects = std::get<tomp::ObjectListT<IdTy, ExprTy>>(clause.t);
+
+  // Prepare list of objects that could end up in a SHARED clause.
+  tomp::ObjectListT<IdTy, ExprTy> sharedObjects;
+  llvm::copy_if(
+      objects, std::back_inserter(sharedObjects),
+      [&](const ObjectTy &object) { return !inFirstprivate(object); });
+
+  if (!sharedObjects.empty()) {
+    // S If the parallel construct is among the constituent constructs and the
+    // S list item is not also specified in the firstprivate clause, then the
+    // S effect of the lastprivate clause is as if the shared clause with the
+    // S same list item is applied to the parallel construct.
+    if (auto hasParallel = findDirective(llvm::omp::OMPD_parallel)) {
+      auto *shared = makeClause(
+          llvm::omp::Clause::OMPC_shared,
+          tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/sharedObjects});
+      hasParallel->clauses.push_back(shared);
+      applied = true;
+    }
+
+    // S If the teams construct is among the constituent constructs and the
+    // S list item is not also specified in the firstprivate clause, then the
+    // S effect of the lastprivate clause is as if the shared clause with the
+    // S same list item is applied to the teams construct.
+    if (auto hasTeams = findDirective(llvm::omp::OMPD_teams)) {
+      auto *shared = makeClause(
+          llvm::omp::Clause::OMPC_shared,
+          tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/sharedObjects});
+      hasTeams->clauses.push_back(shared);
+      applied = true;
+    }
+  }
+
+  // S If the target construct is among the constituent constructs and the
+  // S list item is not the base variable or base pointer of a list item that
+  // S appears in a map clause, the effect of the lastprivate clause is as if
+  // S the same list item appears in a map clause with a map-type of tofrom.
+  if (auto hasTarget = findDirective(llvm::omp::OMPD_target)) {
+    tomp::ObjectListT<IdTy, ExprTy> tofrom;
+    llvm::copy_if(objects, std::back_inserter(tofrom),
+                  [&](const ObjectTy &object) {
+                    return !mapBases.contains(object.id());
+                  });
+
+    if (!tofrom.empty()) {
+      using MapType =
+          typename tomp::clause::MapT<TypeTy, IdTy, ExprTy>::MapType;
+      auto *map =
+          makeClause(llvm::omp::Clause::OMPC_map,
+                     tomp::clause::MapT<TypeTy, IdTy, ExprTy>{
+                         {/*MapType=*/MapType::Tofrom,
+                          /*MapTypeModifier=*/std::nullopt,
+                          /*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt,
+                          /*LocatorList=*/std::move(tofrom)}});
+      hasTarget->clauses.push_back(map);
+      applied = true;
+    }
+  }
+
+  return applied;
+}
+
+// SHARED
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::SharedT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // Apply SHARED to the all leafs that allow it.
+  return applyToAll(node);
+}
+
+// DEFAULT
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::DefaultT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // Apply DEFAULT to the all leafs that allow it.
+  return applyToAll(node);
+}
+
+// THREAD_LIMIT
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::ThreadLimitT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // Apply THREAD_LIMIT to the all leafs that allow it.
+  return applyToAll(node);
+}
+
+// ORDER
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::OrderT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // Apply ORDER to the all leafs that allow it.
+  return applyToAll(node);
+}
+
+// ALLOCATE
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::AllocateT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // This one needs to be applied at the end, once we know which clauses are
+  // assigned to which leaf constructs.
+
+  // S The effect of the allocate clause is as if it is applied to all leaf
+  // S constructs that permit the clause and to which a data-sharing attribute
+  // S clause that may create a private copy of the same list item is applied.
+
+  auto canMakePrivateCopy = [](llvm::omp::Clause id) {
+    switch (id) {
+    case llvm::omp::Clause::OMPC_firstprivate:
+    case llvm::omp::Clause::OMPC_lastprivate:
+    case llvm::omp::Clause::OMPC_private:
+      return true;
+    default:
+      return false;
+    }
+  };
+
+  bool applied = applyIf(node, [&](const auto &dwc) {
+    return llvm::any_of(dwc.clauses, [&](const ClauseTy *n) {
+      return canMakePrivateCopy(n->id);
+    });
+  });
+
+  return applied;
+}
+
+// REDUCTION
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::ReductionT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // S The effect of the reduction clause is as if it is applied to all leaf
+  // S constructs that permit the clause, except for the following constructs:
+  // S - The parallel construct, when combined with the sections, worksharing-
+  // S   loop, loop, or taskloop construct; and
+  // S - The teams construct, when combined with the loop construct.
+  bool applyToParallel = true, applyToTeams = true;
+
+  auto hasParallel = findDirective(llvm::omp::Directive::OMPD_parallel);
+  if (hasParallel) {
+    auto exclusions = llvm::concat<const llvm::omp::Directive>(
+        getWorksharingLoop(), llvm::ArrayRef{
+                                  llvm::omp::Directive::OMPD_loop,
+                                  llvm::omp::Directive::OMPD_sections,
+                                  llvm::omp::Directive::OMPD_taskloop,
+                              });
+    auto present = [&](llvm::omp::Directive id) {
+      return findDirective(id) != nullptr;
+    };
+
+    if (llvm::any_of(exclusions, present))
+      applyToParallel = false;
+  }
+
+  auto hasTeams = findDirective(llvm::omp::Directive::OMPD_teams);
+  if (hasTeams) {
+    // The only exclusion is OMPD_loop.
+    if (findDirective(llvm::omp::Directive::OMPD_loop))
+      applyToTeams = false;
+  }
+
+  auto &objects = std::get<tomp::ObjectListT<IdTy, ExprTy>>(clause.t);
+
+  tomp::ObjectListT<IdTy, ExprTy> sharedObjects;
+  llvm::transform(objects, std::back_inserter(sharedObjects),
+                  [&](const ObjectTy &object) {
+                    auto maybeBase = helper.getBaseObject(object);
+                    return maybeBase ? *maybeBase : object;
+                  });
+
+  // S For the parallel and teams constructs above, the effect of the
+  // S reduction clause instead is as if each list item or, for any list
+  // S item that is an array item, its corresponding base array or base
+  // S pointer appears in a shared clause for the construct.
+  if (!sharedObjects.empty()) {
+    if (hasParallel && !applyToParallel) {
+      auto *shared = makeClause(
+          llvm::omp::Clause::OMPC_shared,
+          tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/sharedObjects});
+      hasParallel->clauses.push_back(shared);
+    }
+    if (hasTeams && !applyToTeams) {
+      auto *shared = makeClause(
+          llvm::omp::Clause::OMPC_shared,
+          tomp::clause::SharedT<TypeTy, IdTy, ExprTy>{/*List=*/sharedObjects});
+      hasTeams->clauses.push_back(shared);
+    }
+  }
+
+  // TODO(not implemented in parser yet): Apply the following.
+  // S If the task reduction-modifier is specified, the effect is as if
+  // S it only modifies the behavior of the reduction clause on the innermost
+  // S leaf construct that accepts the modifier (see Section 5.5.8). If the
+  // S inscan reduction-modifier is specified, the effect is as if it modifies
+  // S the behavior of the reduction clause on all constructs of the combined
+  // S construct to which the clause is applied and that accept the modifier.
+
+  bool applied = applyIf(node, [&](auto &dwc) {
+    if (!applyToParallel && &dwc == hasParallel)
+      return false;
+    if (!applyToTeams && &dwc == hasTeams)
+      return false;
+    return true;
+  });
+
+  // S If a list item in a reduction clause on a combined target construct
+  // S does not have the same base variable or base pointer as a list item
+  // S in a map clause on the construct, then the effect is as if the list
+  // S item in the reduction clause appears as a list item in a map clause
+  // S with a map-type of tofrom.
+  auto hasTarget = findDirective(llvm::omp::Directive::OMPD_target);
+  if (hasTarget && leafs.size() > 1) {
+    tomp::ObjectListT<IdTy, ExprTy> tofrom;
+    llvm::copy_if(objects, std::back_inserter(tofrom),
+                  [&](const ObjectTy &object) {
+                    if (auto maybeBase = helper.getBaseObject(object))
+                      return !mapBases.contains(maybeBase->id());
+                    return !mapBases.contains(object.id()); // XXX is this ok?
+                  });
+    if (!tofrom.empty()) {
+      using MapType =
+          typename tomp::clause::MapT<TypeTy, IdTy, ExprTy>::MapType;
+      auto *map = makeClause(
+          llvm::omp::Clause::OMPC_map,
+          tomp::clause::MapT<TypeTy, IdTy, ExprTy>{
+              {/*MapType=*/MapType::Tofrom, /*MapTypeModifier=*/std::nullopt,
+               /*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt,
+               /*LocatorList=*/std::move(tofrom)}});
+
+      hasTarget->clauses.push_back(map);
+      applied = true;
+    }
+  }
+
+  return applied;
+}
+
+// IF
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::IfT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  using DirectiveNameModifier =
+      typename clause::IfT<TypeTy, IdTy, ExprTy>::DirectiveNameModifier;
+  auto &modifier = std::get<std::optional<DirectiveNameModifier>>(clause.t);
+
+  if (modifier) {
+    llvm::omp::Directive dirId = *modifier;
+
+    if (auto *hasDir = findDirective(dirId)) {
+      hasDir->clauses.push_back(node);
+      return true;
+    }
+    return false;
+  }
+
+  return applyToAll(node);
+}
+
+// LINEAR
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::LinearT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // S The effect of the linear clause is as if it is applied to the innermost
+  // S leaf construct.
+  if (!applyToInnermost(node))
+    return false;
+
+  // The rest is about SIMD.
+  if (!findDirective(llvm::omp::OMPD_simd))
+    return true;
+
+  // S Additionally, if the list item is not the iteration variable of a
+  // S simd or worksharing-loop SIMD construct, the effect on the outer leaf
+  // S constructs is as if the list item was specified in firstprivate and
+  // S lastprivate clauses on the combined or composite construct, [...]
+  //
+  // S If a list item of the linear clause is the iteration variable of a
+  // S simd or worksharing-loop SIMD construct and it is not declared in
+  // S the construct, the effect on the outer leaf constructs is as if the
+  // S list item was specified in a lastprivate clause on the combined or
+  // S composite construct [...]
+
+  // It's not clear how an object can be listed in a clause AND be the
+  // iteration variable of a construct in which is it declared. If an
+  // object is declared in the construct, then the declaration is located
+  // after the clause listing it.
+
+  std::optional<ObjectTy> iterVar = helper.getLoopIterVar();
+  const auto &objects = std::get<tomp::ObjectListT<IdTy, ExprTy>>(clause.t);
+
+  // Lists of objects that will be used to construct FIRSTPRIVATE and
+  // LASTPRIVATE clauses.
+  tomp::ObjectListT<IdTy, ExprTy> first, last;
+
+  for (const ObjectTy &object : objects) {
+    last.push_back(object);
+    if (iterVar && object.id() != iterVar->id())
+      first.push_back(object);
+  }
+
+  if (!first.empty()) {
+    auto *firstp = makeClause(
+        llvm::omp::Clause::OMPC_firstprivate,
+        tomp::clause::FirstprivateT<TypeTy, IdTy, ExprTy>{/*List=*/first});
+    nodes.push_back(firstp); // Appending to the main clause list.
+  }
+  if (!last.empty()) {
+    auto *lastp =
+        makeClause(llvm::omp::Clause::OMPC_lastprivate,
+                   tomp::clause::LastprivateT<TypeTy, IdTy, ExprTy>{
+                       {/*LastprivateModifier=*/std::nullopt, /*List=*/last}});
+    nodes.push_back(lastp); // Appending to the main clause list.
+  }
+  return true;
+}
+
+// NOWAIT
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::NowaitT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  return applyToOutermost(node);
+}
+
+template <typename C, typename H> bool ConstructDecompositionT<C, H>::split() {
+  bool success = true;
+
+  for (llvm::omp::Directive leaf :
+       llvm::omp::getLeafConstructsOrSelf(construct))
+    leafs.push_back(LeafReprInternal{leaf, /*clauses=*/{}});
+
+  for (const ClauseTy *node : nodes)
+    addClauseSymsToMap(*node, node);
+
+  // First we need to apply LINEAR, because it can generate additional
+  // FIRSTPRIVATE and LASTPRIVATE clauses that apply to the combined/
+  // composite construct.
+  // Collect them separately, because they may modify the clause list.
+  llvm::SmallVector<const ClauseTy *> linears;
+  for (const ClauseTy *node : nodes) {
+    if (node->id == llvm::omp::Clause::OMPC_linear)
+      linears.push_back(node);
+  }
+  for (const auto *node : linears) {
+    success = success &&
+              applyClause(std::get<tomp::clause::LinearT<TypeTy, IdTy, ExprTy>>(
+                              node->u),
+                          node);
+  }
+
+  // ALLOCATE clauses need to be applied last since they need to see
+  // which directives have data-privatizing clauses.
+  auto skip = [](const ClauseTy *node) {
+    switch (node->id) {
+    case llvm::omp::Clause::OMPC_allocate:
+    case llvm::omp::Clause::OMPC_linear:
+      return true;
+    default:
+      return false;
+    }
+  };
+
+  // Apply (almost) all clauses.
+  for (const ClauseTy *node : nodes) {
+    if (skip(node))
+      continue;
+    success =
+        success &&
+        std::visit([&](auto &&s) { return applyClause(s, node); }, node->u);
+  }
+
+  // Apply ALLOCATE.
+  for (const ClauseTy *node : nodes) {
+    if (node->id != llvm::omp::Clause::OMPC_allocate)
+      continue;
+    success =
+        success &&
+        std::visit([&](auto &&s) { return applyClause(s, node); }, node->u);
+  }
+
+  return success;
+}
+
+} // namespace tomp
+
+#endif // LLVM_FRONTEND_OPENMP_COMPOUNDSPLITTERT_H
>From 60201bbf05fd1cbd63c0073cf399dcb7cab1ff99 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Thu, 25 Apr 2024 13:09:46 -0500
Subject: [PATCH 2/2] Add LLVM license header, fix include guard
---
 .../Frontend/OpenMP/ConstructDecompositionT.h | 23 ++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
index f8a18a1bed7ae6..f2a3c0d327fd46 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
@@ -1,5 +1,22 @@
-#ifndef LLVM_FRONTEND_OPENMP_COMPOUNDSPLITTERT_H
-#define LLVM_FRONTEND_OPENMP_COMPOUNDSPLITTERT_H
+//===- ConstructDecompositionT.h -- Decomposing compound constructs -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Given a compound construct with a set of clauses, generate the list of
+// constituent leaf constructs, each with a list of clauses that apply to it.
+//
+// Note: Clauses that are not originally present, but that are implied by the
+// OpenMP spec are materialized, and are present in the output.
+//
+// Note: Composite constructs will also be broken up into leaf constructs.
+// If composite constructs require processing as a whole, the lists of clauses
+// for each leaf constituent should be concatenated.
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_FRONTEND_OPENMP_CONSTRUCTDECOMPOSITIONT_H
+#define LLVM_FRONTEND_OPENMP_CONSTRUCTDECOMPOSITIONT_H
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -982,4 +999,4 @@ template <typename C, typename H> bool ConstructDecompositionT<C, H>::split() {
 
 } // namespace tomp
 
-#endif // LLVM_FRONTEND_OPENMP_COMPOUNDSPLITTERT_H
+#endif // LLVM_FRONTEND_OPENMP_CONSTRUCTDECOMPOSITIONT_H
    
    
More information about the llvm-branch-commits
mailing list