[flang-commits] [flang] [llvm] [mlir] [flang][OpenMP] Enable tiling (PR #143715)

Jan Leyonberg via flang-commits flang-commits at lists.llvm.org
Mon Aug 11 04:24:10 PDT 2025


https://github.com/jsjodin updated https://github.com/llvm/llvm-project/pull/143715

>From 77e5be42cec505b6e1884acd392094fa62fe4ebc Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Fri, 14 Mar 2025 11:06:51 -0400
Subject: [PATCH 01/25] Initial implementation of tiling.

---
 flang/include/flang/Lower/OpenMP.h            |   1 -
 flang/lib/Lower/OpenMP/OpenMP.cpp             |  70 +++++--
 flang/lib/Lower/OpenMP/Utils.cpp              |  33 +++-
 flang/lib/Semantics/canonicalize-omp.cpp      |  44 ++++-
 flang/lib/Semantics/resolve-directives.cpp    | 176 ++++++++++++++----
 .../Frontend/OpenMP/ConstructDecompositionT.h |  24 +++
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |   9 +
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  24 +++
 llvm/lib/Transforms/Utils/CodeExtractor.cpp   |   7 +-
 .../mlir/Dialect/OpenMP/OpenMPClauses.td      |  32 ++++
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |   4 +-
 .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp    |   3 +-
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  |  52 +++++-
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  65 +++++--
 14 files changed, 458 insertions(+), 86 deletions(-)

diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h
index 6e150ef4e8e82..4d77fff912f4f 100644
--- a/flang/include/flang/Lower/OpenMP.h
+++ b/flang/include/flang/Lower/OpenMP.h
@@ -79,7 +79,6 @@ void genOpenMPDeclarativeConstruct(AbstractConverter &,
 void genOpenMPSymbolProperties(AbstractConverter &converter,
                                const pft::Variable &var);
 
-int64_t getCollapseValue(const Fortran::parser::OmpClauseList &clauseList);
 void genThreadprivateOp(AbstractConverter &, const pft::Variable &);
 void genDeclareTargetIntGlobal(AbstractConverter &, const pft::Variable &);
 bool isOpenMPTargetConstruct(const parser::OpenMPConstruct &);
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index db6a0e2f39601..7b352da3aa6b7 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -403,6 +403,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
       return;
 
     const parser::OmpClauseList *beginClauseList = nullptr;
+    const parser::OmpClauseList *middleClauseList = nullptr;
     const parser::OmpClauseList *endClauseList = nullptr;
     common::visit(
         common::visitors{
@@ -417,6 +418,22 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
               beginClauseList =
                   &std::get<parser::OmpClauseList>(beginDirective.t);
 
+              // FIXME(JAN): For now we check if there is an inner
+              // OpenMPLoopConstruct, and extract the size clause from there
+              const auto &innerOptional = std::get<std::optional<
+                  common::Indirection<parser::OpenMPLoopConstruct>>>(
+                  ompConstruct.t);
+              if (innerOptional.has_value()) {
+                const auto &innerLoopDirective = innerOptional.value().value();
+                const auto &innerBegin =
+                  std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
+                const auto &innerDirective =
+                  std::get<parser::OmpLoopDirective>(innerBegin.t);
+                if (innerDirective.v == llvm::omp::Directive::OMPD_tile) {
+                  middleClauseList =
+                      &std::get<parser::OmpClauseList>(innerBegin.t);
+                }
+              }
               if (auto &endDirective =
                       std::get<std::optional<parser::OmpEndLoopDirective>>(
                           ompConstruct.t)) {
@@ -430,6 +447,9 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
     assert(beginClauseList && "expected begin directive");
     clauses.append(makeClauses(*beginClauseList, semaCtx));
 
+    if (middleClauseList)
+      clauses.append(makeClauses(*middleClauseList, semaCtx));
+
     if (endClauseList)
       clauses.append(makeClauses(*endClauseList, semaCtx));
   };
@@ -910,6 +930,7 @@ static void genLoopVars(
     storeOp =
         createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
   }
+
   firOpBuilder.setInsertionPointAfter(storeOp);
 }
 
@@ -1660,6 +1681,23 @@ genLoopNestClauses(lower::AbstractConverter &converter,
     cp.processCollapse(loc, eval, clauseOps, iv);
 
   clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr();
+
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  for (auto &clause : clauses) {
+    if (clause.id == llvm::omp::Clause::OMPC_collapse) {
+      const auto &collapse = std::get<clause::Collapse>(clause.u);
+      int64_t collapseValue = evaluate::ToInt64(collapse.v).value();
+      clauseOps.numCollapse = firOpBuilder.getI64IntegerAttr(collapseValue);
+    } else if (clause.id == llvm::omp::Clause::OMPC_sizes) {
+      const auto &sizes = std::get<clause::Sizes>(clause.u);
+      llvm::SmallVector<int64_t> sizeValues;
+      for (auto &size : sizes.v) {
+        int64_t sizeValue = evaluate::ToInt64(size).value();
+        sizeValues.push_back(sizeValue);
+      }
+      clauseOps.tileSizes = sizeValues;
+    }
+  }
 }
 
 static void genLoopClauses(
@@ -2036,9 +2074,9 @@ static mlir::omp::LoopNestOp genLoopNestOp(
     return llvm::SmallVector<const semantics::Symbol *>(iv);
   };
 
-  auto *nestedEval =
-      getCollapsedLoopEval(eval, getCollapseValue(item->clauses));
-
+  uint64_t nestValue = getCollapseValue(item->clauses);
+  nestValue = nestValue < iv.size() ? iv.size() : nestValue;
+  auto *nestedEval = getCollapsedLoopEval(eval, nestValue);
   return genOpWithBody<mlir::omp::LoopNestOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval,
                         directive)
@@ -3863,6 +3901,20 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
       std::get<parser::OmpBeginLoopDirective>(loopConstruct.t);
   List<Clause> clauses = makeClauses(
       std::get<parser::OmpClauseList>(beginLoopDirective.t), semaCtx);
+
+  const auto &innerOptional = std::get<std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(loopConstruct.t);
+  if (innerOptional.has_value()) {
+    const auto &innerLoopDirective = innerOptional.value().value();
+    const auto &innerBegin =
+      std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
+    const auto &innerDirective =
+      std::get<parser::OmpLoopDirective>(innerBegin.t);
+    if (innerDirective.v == llvm::omp::Directive::OMPD_tile) {
+      clauses.append(
+          makeClauses(std::get<parser::OmpClauseList>(innerBegin.t), semaCtx));
+    }
+  }
+
   if (auto &endLoopDirective =
           std::get<std::optional<parser::OmpEndLoopDirective>>(
               loopConstruct.t)) {
@@ -3994,18 +4046,6 @@ void Fortran::lower::genOpenMPSymbolProperties(
     lower::genDeclareTargetIntGlobal(converter, var);
 }
 
-int64_t
-Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) {
-  for (const parser::OmpClause &clause : clauseList.v) {
-    if (const auto &collapseClause =
-            std::get_if<parser::OmpClause::Collapse>(&clause.u)) {
-      const auto *expr = semantics::GetExpr(collapseClause->v);
-      return evaluate::ToInt64(*expr).value();
-    }
-  }
-  return 1;
-}
-
 void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter,
                                         const lower::pft::Variable &var) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 13fda978c5369..7e55726d3b721 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -38,14 +38,22 @@ namespace lower {
 namespace omp {
 
 int64_t getCollapseValue(const List<Clause> &clauses) {
-  auto iter = llvm::find_if(clauses, [](const Clause &clause) {
-    return clause.id == llvm::omp::Clause::OMPC_collapse;
-  });
-  if (iter != clauses.end()) {
-    const auto &collapse = std::get<clause::Collapse>(iter->u);
-    return evaluate::ToInt64(collapse.v).value();
+  int64_t collapseValue = 1;
+  int64_t numTileSizes = 0;
+  for (auto &clause : clauses) {
+    if (clause.id == llvm::omp::Clause::OMPC_collapse) {
+      const auto &collapse = std::get<clause::Collapse>(clause.u);
+      collapseValue = evaluate::ToInt64(collapse.v).value();
+    } else if (clause.id == llvm::omp::Clause::OMPC_sizes) {
+      const auto &sizes = std::get<clause::Sizes>(clause.u);
+      numTileSizes = sizes.v.size();
+    }
   }
-  return 1;
+
+  collapseValue = collapseValue - numTileSizes;
+  int64_t result =
+    collapseValue > numTileSizes ? collapseValue : numTileSizes;
+  return result;
 }
 
 void genObjectList(const ObjectList &objects,
@@ -612,6 +620,7 @@ bool collectLoopRelatedInfo(
     lower::pft::Evaluation &eval, const omp::List<omp::Clause> &clauses,
     mlir::omp::LoopRelatedClauseOps &result,
     llvm::SmallVectorImpl<const semantics::Symbol *> &iv) {
+
   bool found = false;
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
@@ -627,7 +636,16 @@ bool collectLoopRelatedInfo(
     collapseValue = evaluate::ToInt64(clause->v).value();
     found = true;
   }
+  std::int64_t sizesLengthValue = 0l;
+  if (auto *clause =
+          ClauseFinder::findUniqueClause<omp::clause::Sizes>(clauses)) {
+    sizesLengthValue = clause->v.size();
+    found = true;
+  }
 
+  collapseValue = collapseValue - sizesLengthValue;
+  collapseValue =
+      collapseValue < sizesLengthValue ? sizesLengthValue : collapseValue;
   std::size_t loopVarTypeSize = 0;
   do {
     lower::pft::Evaluation *doLoop =
@@ -660,7 +678,6 @@ bool collectLoopRelatedInfo(
   } while (collapseValue > 0);
 
   convertLoopBounds(converter, currentLocation, result, loopVarTypeSize);
-
   return found;
 }
 
diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index 9722eca19447d..fb0bb0f923574 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -11,6 +11,7 @@
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/semantics.h"
 
+# include <stack>
 // After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP
 // Constructs more structured which provide explicit scopes for later
 // structural checks and semantic analysis.
@@ -117,15 +118,17 @@ class CanonicalizationOfOmp {
     // in the same iteration
     //
     // Original:
-    //   ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
-    //     OmpBeginLoopDirective
+    //   ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t->
+    //     OmpBeginLoopDirective t-> OmpLoopDirective
+    //   [ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct u->
+    ///    OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile]
     //   ExecutableConstruct -> DoConstruct
+    //   [ExecutableConstruct -> OmpEndLoopDirective]
     //   ExecutableConstruct -> OmpEndLoopDirective (if available)
     //
     // After rewriting:
-    //   ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
-    //     OmpBeginLoopDirective
-    //     DoConstruct
+    //   ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t->
+    //     OmpBeginLoopDirective t -> OmpLoopDirective -> DoConstruct
     //     OmpEndLoopDirective (if available)
     parser::Block::iterator nextIt;
     auto &beginDir{std::get<parser::OmpBeginLoopDirective>(x.t)};
@@ -147,20 +150,41 @@ class CanonicalizationOfOmp {
       if (GetConstructIf<parser::CompilerDirective>(*nextIt))
         continue;
 
+      // Keep track of the loops to handle the end loop directives
+      std::stack<parser::OpenMPLoopConstruct *> loops;
+      loops.push(&x);
+      while (auto *innerConstruct{
+          GetConstructIf<parser::OpenMPConstruct>(*nextIt)}) {
+        if (auto *innerOmpLoop{
+                std::get_if<parser::OpenMPLoopConstruct>(&innerConstruct->u)}) {
+          std::get<
+              std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
+              loops.top()->t) = std::move(*innerOmpLoop);
+          // Retrieveing the address so that DoConstruct or inner loop can be
+          // set later.
+          loops.push(&(std::get<std::optional<
+                           common::Indirection<parser::OpenMPLoopConstruct>>>(
+              loops.top()->t)
+                           .value()
+                           .value()));
+          nextIt = block.erase(nextIt);
+        }
+      }
       if (auto *doCons{GetConstructIf<parser::DoConstruct>(*nextIt)}) {
         if (doCons->GetLoopControl()) {
           // move DoConstruct
           std::get<std::optional<std::variant<parser::DoConstruct,
-              common::Indirection<parser::OpenMPLoopConstruct>>>>(x.t) =
-              std::move(*doCons);
+              common::Indirection<parser::OpenMPLoopConstruct>>>>(
+              loops.top()->t) = std::move(*doCons);
           nextIt = block.erase(nextIt);
           // try to match OmpEndLoopDirective
-          if (nextIt != block.end()) {
+          while (nextIt != block.end() && !loops.empty()) {
             if (auto *endDir{
                     GetConstructIf<parser::OmpEndLoopDirective>(*nextIt)}) {
-              std::get<std::optional<parser::OmpEndLoopDirective>>(x.t) =
-                  std::move(*endDir);
+              std::get<std::optional<parser::OmpEndLoopDirective>>(
+                  loops.top()->t) = std::move(*endDir);
               nextIt = block.erase(nextIt);
+              loops.pop();
             }
           }
         } else {
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 0557b0870de2f..5d3aa96964318 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -816,7 +816,28 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor<llvm::omp::Directive> {
   const parser::OmpClause *GetAssociatedClause() { return associatedClause; }
 
 private:
+  std::int64_t SetAssociatedMaxClause(llvm::SmallVector<std::int64_t> &,
+      llvm::SmallVector<const parser::OmpClause *> &);
+  std::int64_t GetAssociatedLoopLevelFromLoopConstruct(
+      const parser::OpenMPLoopConstruct &);
   std::int64_t GetAssociatedLoopLevelFromClauses(const parser::OmpClauseList &);
+  void CollectAssociatedLoopLevelsFromLoopConstruct(
+      const parser::OpenMPLoopConstruct &, llvm::SmallVector<std::int64_t> &,
+      llvm::SmallVector<const parser::OmpClause *> &);
+  void CollectAssociatedLoopLevelsFromInnerLoopContruct(
+      const parser::OpenMPLoopConstruct &, llvm::SmallVector<std::int64_t> &,
+      llvm::SmallVector<const parser::OmpClause *> &);
+  template <typename T>
+  void CollectAssociatedLoopLevelFromClauseValue(
+      const parser::OmpClause &clause, llvm::SmallVector<std::int64_t> &,
+      llvm::SmallVector<const parser::OmpClause *> &);
+  template <typename T>
+  void CollectAssociatedLoopLevelFromClauseSize(const parser::OmpClause &,
+      llvm::SmallVector<std::int64_t> &,
+      llvm::SmallVector<const parser::OmpClause *> &);
+  void CollectAssociatedLoopLevelsFromClauses(const parser::OmpClauseList &,
+      llvm::SmallVector<std::int64_t> &,
+      llvm::SmallVector<const parser::OmpClause *> &);
 
   Symbol::Flags dataSharingAttributeFlags{Symbol::Flag::OmpShared,
       Symbol::Flag::OmpPrivate, Symbol::Flag::OmpFirstPrivate,
@@ -1818,7 +1839,6 @@ bool OmpAttributeVisitor::Pre(
 bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) {
   const auto &beginLoopDir{std::get<parser::OmpBeginLoopDirective>(x.t)};
   const auto &beginDir{std::get<parser::OmpLoopDirective>(beginLoopDir.t)};
-  const auto &clauseList{std::get<parser::OmpClauseList>(beginLoopDir.t)};
   switch (beginDir.v) {
   case llvm::omp::Directive::OMPD_distribute:
   case llvm::omp::Directive::OMPD_distribute_parallel_do:
@@ -1869,7 +1889,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) {
       beginDir.v == llvm::omp::Directive::OMPD_target_loop)
     IssueNonConformanceWarning(beginDir.v, beginDir.source, 52);
   ClearDataSharingAttributeObjects();
-  SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromClauses(clauseList));
+  SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromLoopConstruct(x));
 
   if (beginDir.v == llvm::omp::Directive::OMPD_do) {
     auto &optLoopCons = std::get<std::optional<parser::NestedConstruct>>(x.t);
@@ -1883,7 +1903,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) {
     }
   }
   PrivatizeAssociatedLoopIndexAndCheckLoopLevel(x);
-  ordCollapseLevel = GetAssociatedLoopLevelFromClauses(clauseList) + 1;
+  ordCollapseLevel = GetAssociatedLoopLevelFromLoopConstruct(x) + 1;
   return true;
 }
 
@@ -1971,44 +1991,124 @@ bool OmpAttributeVisitor::Pre(const parser::DoConstruct &x) {
   return true;
 }
 
+static bool isSizesClause(const parser::OmpClause *clause) {
+  return std::holds_alternative<parser::OmpClause::Sizes>(clause->u);
+}
+
+std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause(
+    llvm::SmallVector<std::int64_t> &levels,
+    llvm::SmallVector<const parser::OmpClause *> &clauses) {
+
+  // Find the tile level to know how much to reduce the level for collapse
+  std::int64_t tileLevel = 0;
+  for (auto [level, clause] : llvm::zip_equal(levels, clauses)) {
+    if (isSizesClause(clause)) {
+      tileLevel = level;
+    }
+  }
+
+  std::int64_t maxLevel = 1;
+  const parser::OmpClause *maxClause = nullptr;
+  for (auto [level, clause] : llvm::zip_equal(levels, clauses)) {
+    if (tileLevel > 0 && tileLevel < level) {
+      context_.Say(clause->source,
+          "The value of the parameter in the COLLAPSE clause must"
+          " not be larger than the number of the number of tiled loops"
+          " because collapse relies on independent loop iterations."_err_en_US);
+      return 1;
+    }
+
+    if (!isSizesClause(clause)) {
+      level = level - tileLevel;
+    }
+
+    if (level > maxLevel) {
+      maxLevel = level;
+      maxClause = clause;
+    }
+  }
+  if (maxClause)
+    SetAssociatedClause(maxClause);
+  return maxLevel;
+}
+
+std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromLoopConstruct(
+    const parser::OpenMPLoopConstruct &x) {
+  llvm::SmallVector<std::int64_t> levels;
+  llvm::SmallVector<const parser::OmpClause *> clauses;
+
+  CollectAssociatedLoopLevelsFromLoopConstruct(x, levels, clauses);
+  return SetAssociatedMaxClause(levels, clauses);
+}
+
 std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromClauses(
     const parser::OmpClauseList &x) {
-  std::int64_t orderedLevel{0};
-  std::int64_t collapseLevel{0};
+  llvm::SmallVector<std::int64_t> levels;
+  llvm::SmallVector<const parser::OmpClause *> clauses;
 
-  const parser::OmpClause *ordClause{nullptr};
-  const parser::OmpClause *collClause{nullptr};
+  CollectAssociatedLoopLevelsFromClauses(x, levels, clauses);
+  return SetAssociatedMaxClause(levels, clauses);
+}
 
-  for (const auto &clause : x.v) {
-    if (const auto *orderedClause{
-            std::get_if<parser::OmpClause::Ordered>(&clause.u)}) {
-      if (const auto v{EvaluateInt64(context_, orderedClause->v)}) {
-        orderedLevel = *v;
-      }
-      ordClause = &clause;
-    }
-    if (const auto *collapseClause{
-            std::get_if<parser::OmpClause::Collapse>(&clause.u)}) {
-      if (const auto v{EvaluateInt64(context_, collapseClause->v)}) {
-        collapseLevel = *v;
-      }
-      collClause = &clause;
+void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromLoopConstruct(
+    const parser::OpenMPLoopConstruct &x,
+    llvm::SmallVector<std::int64_t> &levels,
+    llvm::SmallVector<const parser::OmpClause *> &clauses) {
+  const auto &beginLoopDir{std::get<parser::OmpBeginLoopDirective>(x.t)};
+  const auto &clauseList{std::get<parser::OmpClauseList>(beginLoopDir.t)};
+
+  CollectAssociatedLoopLevelsFromClauses(clauseList, levels, clauses);
+  CollectAssociatedLoopLevelsFromInnerLoopContruct(x, levels, clauses);
+}
+
+void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct(
+    const parser::OpenMPLoopConstruct &x,
+    llvm::SmallVector<std::int64_t> &levels,
+    llvm::SmallVector<const parser::OmpClause *> &clauses) {
+  const auto &innerOptional =
+      std::get<std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
+          x.t);
+  if (innerOptional.has_value()) {
+    CollectAssociatedLoopLevelsFromLoopConstruct(
+        innerOptional.value().value(), levels, clauses);
+  }
+}
+
+template <typename T>
+void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseValue(
+    const parser::OmpClause &clause, llvm::SmallVector<std::int64_t> &levels,
+    llvm::SmallVector<const parser::OmpClause *> &clauses) {
+  if (const auto tclause{std::get_if<T>(&clause.u)}) {
+    std::int64_t level = 0;
+    if (const auto v{EvaluateInt64(context_, tclause->v)}) {
+      level = *v;
     }
+    levels.push_back(level);
+    clauses.push_back(&clause);
   }
+}
 
-  if (orderedLevel && (!collapseLevel || orderedLevel >= collapseLevel)) {
-    SetAssociatedClause(ordClause);
-    return orderedLevel;
-  } else if (!orderedLevel && collapseLevel) {
-    SetAssociatedClause(collClause);
-    return collapseLevel;
-  } else {
-    SetAssociatedClause(nullptr);
+template <typename T>
+void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseSize(
+    const parser::OmpClause &clause, llvm::SmallVector<std::int64_t> &levels,
+    llvm::SmallVector<const parser::OmpClause *> &clauses) {
+  if (const auto tclause{std::get_if<T>(&clause.u)}) {
+    levels.push_back(tclause->v.size());
+    clauses.push_back(&clause);
   }
-  // orderedLevel < collapseLevel is an error handled in structural
-  // checks
+}
 
-  return 1; // default is outermost loop
+void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromClauses(
+    const parser::OmpClauseList &x, llvm::SmallVector<std::int64_t> &levels,
+    llvm::SmallVector<const parser::OmpClause *> &clauses) {
+  for (const auto &clause : x.v) {
+    CollectAssociatedLoopLevelFromClauseValue<parser::OmpClause::Ordered>(
+        clause, levels, clauses);
+    CollectAssociatedLoopLevelFromClauseValue<parser::OmpClause::Collapse>(
+        clause, levels, clauses);
+    CollectAssociatedLoopLevelFromClauseSize<parser::OmpClause::Sizes>(
+        clause, levels, clauses);
+  }
 }
 
 // 2.15.1.1 Data-sharing Attribute Rules - Predetermined
@@ -2040,10 +2140,18 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
   const parser::OmpClause *clause{GetAssociatedClause()};
   bool hasCollapseClause{
       clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false};
+  const parser::OpenMPLoopConstruct *innerMostLoop = &x;
+
+  while (auto &optLoopCons{
+      std::get<std::optional<parser::NestedConstruct>>(x.t)}) {
+    if (const auto &innerLoop{
+            std::get_if < parser::OpenMPLoopConstruct >>> (innerMostLoop->t)}) {
+      innerMostLoop = &innerLoop.value().value();
+    }
+  }
 
-  auto &optLoopCons = std::get<std::optional<parser::NestedConstruct>>(x.t);
   if (optLoopCons.has_value()) {
-    if (const auto &outer{std::get_if<parser::DoConstruct>(&*optLoopCons)}) {
+    if (const auto &outer{std::get_if<parser::DoConstruct>(innerMostLoop->t)}) {
       for (const parser::DoConstruct *loop{&*outer}; loop && level > 0;
           --level) {
         if (loop->IsDoConcurrent()) {
diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
index 047baa3a79f5d..83db78667c7f8 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
@@ -209,6 +209,8 @@ struct ConstructDecompositionT {
 
   bool applyClause(const tomp::clause::CollapseT<TypeTy, IdTy, ExprTy> &clause,
                    const ClauseTy *);
+  bool applyClause(const tomp::clause::SizesT<TypeTy, IdTy, ExprTy> &clause,
+                   const ClauseTy *);
   bool applyClause(const tomp::clause::PrivateT<TypeTy, IdTy, ExprTy> &clause,
                    const ClauseTy *);
   bool
@@ -482,6 +484,28 @@ bool ConstructDecompositionT<C, H>::applyClause(
 
   return false;
 }
+// FIXME(JAN): Do the correct thing, but for now we'll do the same as collapse
+template <typename C, typename H>
+bool ConstructDecompositionT<C, H>::applyClause(
+    const tomp::clause::SizesT<TypeTy, IdTy, ExprTy> &clause,
+    const ClauseTy *node) {
+  // Apply "sizes" to the innermost directive. If it's not one that
+  // allows it flag an error.
+  if (!leafs.empty()) {
+    auto &last = leafs.back();
+
+    if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) {
+      last.clauses.push_back(node);
+      return true;
+    } else {
+      llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n";
+      last.clauses.push_back(node);
+      return true;
+    }
+  }
+
+  return false;
+}
 
 // PRIVATE
 // [5.2:111:5-7]
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 206ad4a4ef85f..b50dfe06ef437 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2170,6 +2170,9 @@ class OpenMPIRBuilder {
 
     /// Return the function that contains the region to be outlined.
     Function *getFunction() const { return EntryBB->getParent(); }
+
+    /// Dump the info in a somewhat readable way
+    void dump();
   };
 
   /// Collection of regions that need to be outlined during finalization.
@@ -2187,6 +2190,9 @@ class OpenMPIRBuilder {
   /// Add a new region that will be outlined later.
   void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
 
+  /// Dump outline infos
+  void dumpOutlineInfos();
+
   /// An ordered map of auto-generated variables to their unique names.
   /// It stores variables with the following names: 1) ".gomp_critical_user_" +
   /// <critical_section_name> + ".var" for "omp critical" directives; 2)
@@ -3772,6 +3778,9 @@ class CanonicalLoopInfo {
   /// Invalidate this loop. That is, the underlying IR does not fulfill the
   /// requirements of an OpenMP canonical loop anymore.
   LLVM_ABI void invalidate();
+
+  /// Dump the info in a somewhat readable way
+  void dump();
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 3aa4f7ae04c33..f4d9791bb0ad2 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -8725,6 +8725,15 @@ Error OpenMPIRBuilder::emitOffloadingArrays(
   return Error::success();
 }
 
+void OpenMPIRBuilder::dumpOutlineInfos() {
+  errs() << "=== Outline Infos Begin ===\n";
+  for (auto En : enumerate(OutlineInfos)) {
+    errs() << "[" << En.index() << "]: ";
+    En.value().dump();
+  }
+  errs() << "=== Outline Infos End ===\n";
+}
+
 void OpenMPIRBuilder::emitBranch(BasicBlock *Target) {
   BasicBlock *CurBB = Builder.GetInsertBlock();
 
@@ -9633,6 +9642,14 @@ void OpenMPIRBuilder::OutlineInfo::collectBlocks(
   }
 }
 
+void OpenMPIRBuilder::OutlineInfo::dump() {
+  errs() << "=== OutilneInfo == "
+         << " EntryBB: " << (EntryBB ? EntryBB->getName() : "n\a")
+         << " ExitBB: " << (ExitBB ? ExitBB->getName() : "n\a")
+         << " OuterAllocaBB: "
+         << (OuterAllocaBB ? OuterAllocaBB->getName() : "n/a") << "\n";
+}
+
 void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr,
                                          uint64_t Size, int32_t Flags,
                                          GlobalValue::LinkageTypes,
@@ -10410,3 +10427,10 @@ void CanonicalLoopInfo::invalidate() {
   Latch = nullptr;
   Exit = nullptr;
 }
+
+void CanonicalLoopInfo::dump() {
+  errs() << "CanonicaLoop == Header: " << (Header ? Header->getName() : "n/a")
+         << " Cond: " << (Cond ? Cond->getName() : "n/a")
+         << " Latch: " << (Latch ? Latch->getName() : "n/a")
+         << " Exit: " << (Exit ? Exit->getName() : "n/a") << "\n";
+}
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 7a9dd37b72205..921e2ff2506fc 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -810,7 +810,11 @@ void CodeExtractor::severSplitPHINodesOfExits() {
 }
 
 void CodeExtractor::splitReturnBlocks() {
-  for (BasicBlock *Block : Blocks)
+  for (BasicBlock *Block : Blocks) {
+    if (!Block->getTerminator()) {
+      errs() << "====== No terminator in block: " << Block->getName()
+             << "======\n";
+    }
     if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) {
       BasicBlock *New =
           Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret");
@@ -827,6 +831,7 @@ void CodeExtractor::splitReturnBlocks() {
           DT->changeImmediateDominator(I, NewNode);
       }
     }
+  }
 }
 
 Function *CodeExtractor::constructFunctionDeclaration(
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
index 311c57fb4446c..eb836db890738 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
@@ -317,6 +317,38 @@ class OpenMP_DeviceClauseSkip<
 
 def OpenMP_DeviceClause : OpenMP_DeviceClauseSkip<>;
 
+//===----------------------------------------------------------------------===//
+// V5.2: [XX.X] `collapse` clause
+//===----------------------------------------------------------------------===//
+
+class OpenMP_CollapseClauseSkip<
+    bit traits = false, bit arguments = false, bit assemblyFormat = false,
+    bit description = false, bit extraClassDeclaration = false
+  > : OpenMP_Clause<traits, arguments, assemblyFormat, description,
+                    extraClassDeclaration> {
+  let arguments = (ins
+      DefaultValuedOptionalAttr<I64Attr, "1">:$num_collapse
+  );
+}
+
+def OpenMP_CollapseClause : OpenMP_CollapseClauseSkip<>;
+
+//===----------------------------------------------------------------------===//
+// V5.2: [xx.x] `sizes` clause
+//===----------------------------------------------------------------------===//
+
+class OpenMP_TileSizesClauseSkip<
+    bit traits = false, bit arguments = false, bit assemblyFormat = false,
+    bit description = false, bit extraClassDeclaration = false
+  > : OpenMP_Clause<traits, arguments, assemblyFormat, description,
+                    extraClassDeclaration> {
+  let arguments = (ins
+      OptionalAttr<DenseI64ArrayAttr>:$tile_sizes
+  );
+}
+
+def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>;
+
 //===----------------------------------------------------------------------===//
 // V5.2: [11.6.1] `dist_schedule` clause
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index be114ea4fb631..0b13c526b7f03 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -614,7 +614,9 @@ def WorkshareLoopWrapperOp : OpenMP_Op<"workshare.loop_wrapper", traits = [
 def LoopNestOp : OpenMP_Op<"loop_nest", traits = [
     RecursiveMemoryEffects, SameVariadicOperandSize
   ], clauses = [
-    OpenMP_LoopRelatedClause
+    OpenMP_LoopRelatedClause,
+    OpenMP_CollapseClause,
+    OpenMP_TileSizesClause
   ], singleRegion = true> {
   let summary = "rectangular loop nest";
   let description = [{
diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
index 34f372af1e4b5..bec17258d058f 100644
--- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
+++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
@@ -493,7 +493,8 @@ struct ParallelOpLowering : public OpRewritePattern<scf::ParallelOp> {
         // Create loop nest and populate region with contents of scf.parallel.
         auto loopOp = omp::LoopNestOp::create(
             rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(),
-            parallelOp.getUpperBound(), parallelOp.getStep());
+            parallelOp.getUpperBound(), parallelOp.getStep(), false, 1,
+            nullptr);
 
         rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(),
                                     loopOp.getRegion().begin());
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index c1c1767ef90b0..0bef00fed1ba0 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -56,6 +56,11 @@ makeDenseBoolArrayAttr(MLIRContext *ctx, const ArrayRef<bool> boolArray) {
   return boolArray.empty() ? nullptr : DenseBoolArrayAttr::get(ctx, boolArray);
 }
 
+static DenseI64ArrayAttr
+makeDenseI64ArrayAttr(MLIRContext *ctx, const ArrayRef<int64_t> intArray) {
+  return intArray.empty() ? nullptr : DenseI64ArrayAttr::get(ctx, intArray);
+}
+
 namespace {
 struct MemRefPointerLikeModel
     : public PointerLikeType::ExternalModel<MemRefPointerLikeModel,
@@ -2956,10 +2961,10 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) {
   for (auto &iv : ivs)
     iv.type = loopVarType;
 
+  auto ctx = parser.getBuilder().getContext();
   // Parse "inclusive" flag.
   if (succeeded(parser.parseOptionalKeyword("inclusive")))
-    result.addAttribute("loop_inclusive",
-                        UnitAttr::get(parser.getBuilder().getContext()));
+    result.addAttribute("loop_inclusive", UnitAttr::get(ctx));
 
   // Parse step values.
   SmallVector<OpAsmParser::UnresolvedOperand> steps;
@@ -2967,6 +2972,38 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) {
       parser.parseOperandList(steps, ivs.size(), OpAsmParser::Delimiter::Paren))
     return failure();
 
+  // Parse collapse
+  int64_t value = 0;
+  if (!parser.parseOptionalKeyword("collapse") &&
+      (parser.parseLParen() || parser.parseInteger(value) ||
+       parser.parseRParen()))
+    return failure();
+  if (value > 1) {
+    result.addAttribute(
+        "num_collapse",
+        IntegerAttr::get(parser.getBuilder().getI64Type(), value));
+  }
+
+  // Parse tiles
+  SmallVector<int64_t> tiles;
+  auto parseTiles = [&]() -> ParseResult {
+    int64_t tile;
+    if (parser.parseInteger(tile))
+      return failure();
+    tiles.push_back(tile);
+    return success();
+  };
+
+  if (!parser.parseOptionalKeyword("tiles") &&
+      (parser.parseLParen() ||
+      parser.parseCommaSeparatedList(parseTiles) ||
+       parser.parseRParen()))
+    return failure();
+
+  if (tiles.size() > 0) {
+    result.addAttribute("tile_sizes", DenseI64ArrayAttr::get(ctx, tiles));
+  }
+
   // Parse the body.
   Region *region = result.addRegion();
   if (parser.parseRegion(*region, ivs))
@@ -2990,14 +3027,23 @@ void LoopNestOp::print(OpAsmPrinter &p) {
   if (getLoopInclusive())
     p << "inclusive ";
   p << "step (" << getLoopSteps() << ") ";
+  if (int64_t numCollapse = getNumCollapse()) {
+    if (numCollapse > 1)
+      p << "collapse(" << numCollapse << ") ";
+  }
+  if (const auto tiles = getTileSizes()) {
+    p << "tiles(" << tiles.value() << ") ";
+  }
   p.printRegion(region, /*printEntryBlockArgs=*/false);
 }
 
 void LoopNestOp::build(OpBuilder &builder, OperationState &state,
                        const LoopNestOperands &clauses) {
+  MLIRContext *ctx = builder.getContext();
   LoopNestOp::build(builder, state, clauses.loopLowerBounds,
                     clauses.loopUpperBounds, clauses.loopSteps,
-                    clauses.loopInclusive);
+                    clauses.loopInclusive, clauses.numCollapse,
+                    makeDenseI64ArrayAttr(ctx, clauses.tileSizes));
 }
 
 LogicalResult LoopNestOp::verify() {
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 762cc88d9fc3d..5e1453bcb6de5 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2966,10 +2966,9 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
 /// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
-                   LLVM::ModuleTranslation &moduleTranslation) {
+                        LLVM::ModuleTranslation &moduleTranslation) {
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
   auto loopOp = cast<omp::LoopNestOp>(opInst);
-
   // Set up the source location value for OpenMP runtime.
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
 
@@ -3035,18 +3034,60 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
     loopInfos.push_back(*loopResult);
   }
 
-  // Collapse loops. Store the insertion point because LoopInfos may get
-  // invalidated.
+  //  llvm::OpenMPIRBuilder::InsertPointTy afterIP = builder.saveIP();
   llvm::OpenMPIRBuilder::InsertPointTy afterIP =
-      loopInfos.front()->getAfterIP();
+    loopInfos.front()->getAfterIP();
 
-  // Update the stack frame created for this loop to point to the resulting loop
-  // after applying transformations.
-  moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
-      [&](OpenMPLoopInfoStackFrame &frame) {
-        frame.loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
-        return WalkResult::interrupt();
-      });
+  // Initialize the new loop info to the current one, in case there
+  // are no loop transformations done.
+  llvm::CanonicalLoopInfo *NewTopLoopInfo = nullptr;
+
+  // Do tiling
+  if (const auto &tiles = loopOp.getTileSizes()) {
+    llvm::Type *IVType = loopInfos.front()->getIndVarType();
+    SmallVector<llvm::Value *> TileSizes;
+
+    for (auto tile : tiles.value()) {
+      llvm::Value *TileVal =  llvm::ConstantInt::get(IVType, tile);
+      TileSizes.push_back(TileVal);
+    }
+
+    std::vector<llvm::CanonicalLoopInfo*> NewLoops =
+      ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes);
+
+    // Collapse loops. Store the insertion point because LoopInfos may get
+    // invalidated.
+    auto AfterBB = NewLoops.front()->getAfter();
+    auto AfterAfterBB = AfterBB->getSingleSuccessor();
+    afterIP = {AfterAfterBB, AfterAfterBB->begin()};
+    NewTopLoopInfo = NewLoops[0];
+
+    // Update the loop infos
+    loopInfos.clear();
+    for (const auto& newLoop : NewLoops) {
+      loopInfos.push_back(newLoop);
+    }
+  } // Tiling done
+
+  // Do collapse
+  if (const auto &numCollapse = loopOp.getNumCollapse()) {
+    SmallVector<llvm::CanonicalLoopInfo *> collapseLoopInfos(
+        loopInfos.begin(), loopInfos.begin() + (numCollapse));
+
+    auto newLoopInfo =
+        ompBuilder->collapseLoops(ompLoc.DL, collapseLoopInfos, {});
+    NewTopLoopInfo = newLoopInfo;
+  } // Collapse done
+
+  // Update the stack frame created for this loop to point to the resulting
+  // loop after applying transformations.
+  if (NewTopLoopInfo) {
+    moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
+        [&](OpenMPLoopInfoStackFrame &frame) {
+          frame.loopInfo = NewTopLoopInfo;
+          return WalkResult::interrupt();
+        });
+  }
 
   // Continue building IR after the loop. Note that the LoopInfo returned by
   // `collapseLoops` points inside the outermost loop and is intended for

>From 09fded704ef2bac6f62b2b9a0b5ff0bcc5a5bfb9 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Tue, 10 Jun 2025 09:51:02 -0400
Subject: [PATCH 02/25] Fix tests and limit the nesting of construct to only
 tiling.

---
 flang/lib/Semantics/canonicalize-omp.cpp      | 34 ++++++++++++-------
 .../Lower/OpenMP/parallel-wsloop-lastpriv.f90 |  4 +--
 flang/test/Lower/OpenMP/simd.f90              |  2 +-
 flang/test/Lower/OpenMP/wsloop-variable.f90   |  2 +-
 flang/test/Semantics/OpenMP/do-collapse.f90   |  1 +
 .../LLVMIR/omptarget-wsloop-collapsed.mlir    |  2 +-
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      | 12 +++----
 7 files changed, 33 insertions(+), 24 deletions(-)

diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index fb0bb0f923574..10eaaa83f5f4f 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -149,27 +149,32 @@ class CanonicalizationOfOmp {
       // Ignore compiler directives.
       if (GetConstructIf<parser::CompilerDirective>(*nextIt))
         continue;
-
       // Keep track of the loops to handle the end loop directives
       std::stack<parser::OpenMPLoopConstruct *> loops;
       loops.push(&x);
-      while (auto *innerConstruct{
+      if (auto *innerConstruct{
           GetConstructIf<parser::OpenMPConstruct>(*nextIt)}) {
         if (auto *innerOmpLoop{
                 std::get_if<parser::OpenMPLoopConstruct>(&innerConstruct->u)}) {
-          std::get<
-              std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
-              loops.top()->t) = std::move(*innerOmpLoop);
-          // Retrieveing the address so that DoConstruct or inner loop can be
-          // set later.
-          loops.push(&(std::get<std::optional<
-                           common::Indirection<parser::OpenMPLoopConstruct>>>(
-              loops.top()->t)
-                           .value()
-                           .value()));
-          nextIt = block.erase(nextIt);
+          auto &innerBeginDir{
+              std::get<parser::OmpBeginLoopDirective>(innerOmpLoop->t)};
+          auto &innerDir{std::get<parser::OmpLoopDirective>(innerBeginDir.t)};
+          if (innerDir.v == llvm::omp::Directive::OMPD_tile) {
+            std::get<std::optional<
+                common::Indirection<parser::OpenMPLoopConstruct>>>(
+                loops.top()->t) = std::move(*innerOmpLoop);
+            // Retrieveing the address so that DoConstruct or inner loop can be
+            // set later.
+            loops.push(&(std::get<std::optional<
+                             common::Indirection<parser::OpenMPLoopConstruct>>>(
+                loops.top()->t)
+                             .value()
+                             .value()));
+            nextIt = block.erase(nextIt);
+          }
         }
       }
+
       if (auto *doCons{GetConstructIf<parser::DoConstruct>(*nextIt)}) {
         if (doCons->GetLoopControl()) {
           // move DoConstruct
@@ -185,6 +190,9 @@ class CanonicalizationOfOmp {
                   loops.top()->t) = std::move(*endDir);
               nextIt = block.erase(nextIt);
               loops.pop();
+            } else {
+              // If there is a mismatch bail out.
+              break;
             }
           }
         } else {
diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90
index 2890e78e9d17f..faf8f717f6308 100644
--- a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90
+++ b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90
@@ -108,7 +108,7 @@ subroutine omp_do_lastprivate_collapse2(a)
   ! CHECK-NEXT: %[[UB2:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP2:.*]] = arith.constant 1 : i32
   ! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
-  ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) {
+  ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) collapse(2) {
   ! CHECK:      %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   ! CHECK:      %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   ! CHECK:      %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -174,7 +174,7 @@ subroutine omp_do_lastprivate_collapse3(a)
   ! CHECK-NEXT: %[[UB3:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP3:.*]] = arith.constant 1 : i32
   ! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[K_PVT_REF:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
-  ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) {
+  ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) collapse(3) {
   ! CHECK:      %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   ! CHECK:      %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   ! CHECK:      %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90
index d815474b84b31..3572b9baff00b 100644
--- a/flang/test/Lower/OpenMP/simd.f90
+++ b/flang/test/Lower/OpenMP/simd.f90
@@ -175,7 +175,7 @@ subroutine simd_with_collapse_clause(n)
   ! CHECK-NEXT: omp.loop_nest (%[[ARG_0:.*]], %[[ARG_1:.*]]) : i32 = (
   ! CHECK-SAME:                %[[LOWER_I]], %[[LOWER_J]]) to (
   ! CHECK-SAME:                %[[UPPER_I]], %[[UPPER_J]]) inclusive step (
-  ! CHECK-SAME:                %[[STEP_I]], %[[STEP_J]]) {
+  ! CHECK-SAME:                %[[STEP_I]], %[[STEP_J]]) collapse(2) {
   !$OMP SIMD COLLAPSE(2)
   do i = 1, n
     do j = 1, n
diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90
index a7fb5fb8936e7..cceb77b974fee 100644
--- a/flang/test/Lower/OpenMP/wsloop-variable.f90
+++ b/flang/test/Lower/OpenMP/wsloop-variable.f90
@@ -23,7 +23,7 @@ program wsloop_variable
 !CHECK:      %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64
 !CHECK:      %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64
 !CHECK:      omp.wsloop private({{.*}}) {
-!CHECK-NEXT:   omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
+!CHECK-NEXT:   omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) collapse(2) {
 !CHECK:          %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16
 !CHECK:          hlfir.assign %[[ARG0_I16]] to %[[STORE_IV0:.*]]#0 : i16, !fir.ref<i16>
 !CHECK:          hlfir.assign %[[ARG1]] to %[[STORE_IV1:.*]]#0 : i64, !fir.ref<i64>
diff --git a/flang/test/Semantics/OpenMP/do-collapse.f90 b/flang/test/Semantics/OpenMP/do-collapse.f90
index 480bd45b79b83..ec6a3bdad3686 100644
--- a/flang/test/Semantics/OpenMP/do-collapse.f90
+++ b/flang/test/Semantics/OpenMP/do-collapse.f90
@@ -31,6 +31,7 @@ program omp_doCollapse
       end do
     end do
 
+  !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct.
   !ERROR: At most one COLLAPSE clause can appear on the SIMD directive
   !$omp simd collapse(2) collapse(1)
   do i = 1, 4
diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
index 0ebcec0e0ec31..189e3bc57db86 100644
--- a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
@@ -9,7 +9,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
     %loop_lb = llvm.mlir.constant(0 : i32) : i32
     %loop_step = llvm.mlir.constant(1 : index) : i32
     omp.wsloop {
-      omp.loop_nest (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) {
+      omp.loop_nest (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) collapse(2) {
         %1 = llvm.add %arg1, %arg2  : i32
         %2 = llvm.mul %arg2, %loop_ub overflow<nsw>  : i32
         %3 = llvm.add %arg1, %2 :i32
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 3f4dcd5e24c56..27210bc0890ce 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -698,7 +698,7 @@ llvm.func @simd_simple(%lb : i64, %ub : i64, %step : i64, %arg0: !llvm.ptr) {
 // CHECK-LABEL: @simd_simple_multiple
 llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) {
   omp.simd {
-    omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) inclusive step (%step1, %step2) {
+    omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) inclusive step (%step1, %step2) collapse(2) {
       %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
       // The form of the emitted IR is controlled by OpenMPIRBuilder and
       // tested there. Just check that the right metadata is added and collapsed
@@ -736,7 +736,7 @@ llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64
 // CHECK-LABEL: @simd_simple_multiple_simdlen
 llvm.func @simd_simple_multiple_simdlen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) {
   omp.simd simdlen(2) {
-    omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
+    omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) {
       %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
       // The form of the emitted IR is controlled by OpenMPIRBuilder and
       // tested there. Just check that the right metadata is added.
@@ -760,7 +760,7 @@ llvm.func @simd_simple_multiple_simdlen(%lb1 : i64, %ub1 : i64, %step1 : i64, %l
 // CHECK-LABEL: @simd_simple_multiple_safelen
 llvm.func @simd_simple_multiple_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) {
   omp.simd safelen(2) {
-    omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
+    omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) {
       %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
       %4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr, i64) -> !llvm.ptr, f32
       %5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr, i64) -> !llvm.ptr, f32
@@ -779,7 +779,7 @@ llvm.func @simd_simple_multiple_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %l
 // CHECK-LABEL: @simd_simple_multiple_simdlen_safelen
 llvm.func @simd_simple_multiple_simdlen_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) {
   omp.simd simdlen(1) safelen(2) {
-    omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
+    omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) {
       %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
       %4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr, i64) -> !llvm.ptr, f32
       %5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr, i64) -> !llvm.ptr, f32
@@ -1177,7 +1177,7 @@ llvm.func @collapse_wsloop(
     // CHECK: store i32 %[[TOTAL_SUB_1]], ptr
     // CHECK: call void @__kmpc_for_static_init_4u
     omp.wsloop {
-      omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
+      omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) collapse(3) {
         %31 = llvm.load %20 : !llvm.ptr -> i32
         %32 = llvm.add %31, %arg0 : i32
         %33 = llvm.add %32, %arg1 : i32
@@ -1239,7 +1239,7 @@ llvm.func @collapse_wsloop_dynamic(
     // CHECK: store i32 %[[TOTAL]], ptr
     // CHECK: call void @__kmpc_dispatch_init_4u
     omp.wsloop schedule(dynamic) {
-      omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
+      omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) collapse(3) {
         %31 = llvm.load %20 : !llvm.ptr -> i32
         %32 = llvm.add %31, %arg0 : i32
         %33 = llvm.add %32, %arg1 : i32

>From 4d4ac60bf9f97f5b1e683c93fb1eb03eb34ac79a Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Tue, 10 Jun 2025 10:18:32 -0400
Subject: [PATCH 03/25] Enable stand-alone tiling, but it gives a warning and
 converting to simd.

---
 flang/lib/Lower/OpenMP/OpenMP.cpp           | 42 ++++++++++++++++++---
 flang/test/Lower/OpenMP/wsloop-collapse.f90 |  2 +-
 2 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 7b352da3aa6b7..604d1e0167eef 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -2267,6 +2267,39 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter,
   // Apply unrolling to it
   auto cli = canonLoop.getCli();
   mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli);
+
+static mlir::omp::LoopOp
+genTiledLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+          semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+          mlir::Location loc, const ConstructQueue &queue,
+          ConstructQueue::const_iterator item) {
+  mlir::omp::LoopOperands loopClauseOps;
+  llvm::SmallVector<const semantics::Symbol *> loopReductionSyms;
+  genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps,
+                 loopReductionSyms);
+
+  DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
+                           /*shouldCollectPreDeterminedSymbols=*/true,
+                           /*useDelayedPrivatization=*/true, symTable);
+  dsp.processStep1(&loopClauseOps);
+
+  mlir::omp::LoopNestOperands loopNestClauseOps;
+  llvm::SmallVector<const semantics::Symbol *> iv;
+  genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
+                     loopNestClauseOps, iv);
+
+  EntryBlockArgs loopArgs;
+  loopArgs.priv.syms = dsp.getDelayedPrivSymbols();
+  loopArgs.priv.vars = loopClauseOps.privateVars;
+  loopArgs.reduction.syms = loopReductionSyms;
+  loopArgs.reduction.vars = loopClauseOps.reductionVars;
+
+  auto loopOp =
+      genWrapperOp<mlir::omp::LoopOp>(converter, loc, loopClauseOps, loopArgs);
+  genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
+                loopNestClauseOps, iv, {{loopOp, loopArgs}},
+                llvm::omp::Directive::OMPD_loop, dsp);
+  return loopOp;
 }
 
 static mlir::omp::MaskedOp
@@ -3484,11 +3517,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
     newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue,
                        item);
     break;
-  case llvm::omp::Directive::OMPD_tile: {
-    unsigned version = semaCtx.langOptions().OpenMPVersion;
-    TODO(loc, "Unhandled loop directive (" +
-                  llvm::omp::getOpenMPDirectiveName(dir, version) + ")");
-  }
+  case llvm::omp::Directive::OMPD_tile:
+    newOp =
+      genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item);
+    break;
   case llvm::omp::Directive::OMPD_unroll:
     genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
     break;
diff --git a/flang/test/Lower/OpenMP/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/wsloop-collapse.f90
index 7ec40ab4b2f43..677c7809c397f 100644
--- a/flang/test/Lower/OpenMP/wsloop-collapse.f90
+++ b/flang/test/Lower/OpenMP/wsloop-collapse.f90
@@ -57,7 +57,7 @@ program wsloop_collapse
 !CHECK:           %[[VAL_31:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
 !CHECK:           %[[VAL_32:.*]] = arith.constant 1 : i32
 !CHECK:           omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_4:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_2:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_0:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
-!CHECK-NEXT:        omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) {
+!CHECK-NEXT:        omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) collapse(3) {
   !$omp do collapse(3)
   do i = 1, a
      do j= 1, b

>From 20a4df1c887bd1ae6e3d6de283e0f9f58e5fd0fc Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Wed, 11 Jun 2025 10:25:00 -0400
Subject: [PATCH 04/25] Add minimal test, remove debug print.

---
 flang/test/Lower/OpenMP/wsloop-tile.f90       | 30 +++++++++++++++++++
 .../Frontend/OpenMP/ConstructDecompositionT.h |  2 +-
 2 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 flang/test/Lower/OpenMP/wsloop-tile.f90

diff --git a/flang/test/Lower/OpenMP/wsloop-tile.f90 b/flang/test/Lower/OpenMP/wsloop-tile.f90
new file mode 100644
index 0000000000000..f43b558ce46bb
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-tile.f90
@@ -0,0 +1,30 @@
+! This test checks lowering of OpenMP DO Directive(Worksharing) with collapse.
+
+! RUN: bbc -fopenmp -fopenmp-version=51 -emit-hlfir %s -o - | FileCheck %s
+
+!CHECK-LABEL:   func.func @_QQmain() attributes {fir.bindc_name = "wsloop_tile"} {
+program wsloop_tile
+  integer :: i, j, k
+  integer :: a, b, c
+  integer :: x
+
+  a=30
+  b=20
+  c=50
+  x=0
+
+  !CHECK: omp.loop_nest
+  !CHECK-SAME: tiles(2, 5, 10)
+
+  !$omp do
+  !$omp tile sizes(2,5,10)
+  do i = 1, a
+     do j= 1, b
+        do k = 1, c
+           x = x + i + j + k
+        end do
+     end do
+  end do
+  !$omp end tile
+  !$omp end do
+end program wsloop_tile
diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
index 83db78667c7f8..e1083b7ef2cd9 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
@@ -498,7 +498,7 @@ bool ConstructDecompositionT<C, H>::applyClause(
       last.clauses.push_back(node);
       return true;
     } else {
-      llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n";
+      //      llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n";
       last.clauses.push_back(node);
       return true;
     }

>From cfcd95c78d265a1823170ccabfa484fb0ed236b3 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Fri, 13 Jun 2025 09:53:10 -0400
Subject: [PATCH 05/25] Fix formatting

---
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 27 +++++++++++--------
 flang/lib/Lower/OpenMP/Utils.cpp              |  3 +--
 flang/lib/Semantics/canonicalize-omp.cpp      |  4 +--
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  |  3 +--
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 12 ++++-----
 5 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 604d1e0167eef..6e41756bf7b89 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -426,9 +426,10 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
               if (innerOptional.has_value()) {
                 const auto &innerLoopDirective = innerOptional.value().value();
                 const auto &innerBegin =
-                  std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
+                    std::get<parser::OmpBeginLoopDirective>(
+                        innerLoopDirective.t);
                 const auto &innerDirective =
-                  std::get<parser::OmpLoopDirective>(innerBegin.t);
+                    std::get<parser::OmpLoopDirective>(innerBegin.t);
                 if (innerDirective.v == llvm::omp::Directive::OMPD_tile) {
                   middleClauseList =
                       &std::get<parser::OmpClauseList>(innerBegin.t);
@@ -2268,11 +2269,13 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter,
   auto cli = canonLoop.getCli();
   mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli);
 
-static mlir::omp::LoopOp
-genTiledLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
-          semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
-          mlir::Location loc, const ConstructQueue &queue,
-          ConstructQueue::const_iterator item) {
+static mlir::omp::LoopOp genTiledLoopOp(lower::AbstractConverter &converter,
+                                        lower::SymMap &symTable,
+                                        semantics::SemanticsContext &semaCtx,
+                                        lower::pft::Evaluation &eval,
+                                        mlir::Location loc,
+                                        const ConstructQueue &queue,
+                                        ConstructQueue::const_iterator item) {
   mlir::omp::LoopOperands loopClauseOps;
   llvm::SmallVector<const semantics::Symbol *> loopReductionSyms;
   genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps,
@@ -3519,7 +3522,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
     break;
   case llvm::omp::Directive::OMPD_tile:
     newOp =
-      genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item);
+        genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_unroll:
     genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
@@ -3934,13 +3937,15 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
   List<Clause> clauses = makeClauses(
       std::get<parser::OmpClauseList>(beginLoopDirective.t), semaCtx);
 
-  const auto &innerOptional = std::get<std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(loopConstruct.t);
+  const auto &innerOptional =
+      std::get<std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
+          loopConstruct.t);
   if (innerOptional.has_value()) {
     const auto &innerLoopDirective = innerOptional.value().value();
     const auto &innerBegin =
-      std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
+        std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
     const auto &innerDirective =
-      std::get<parser::OmpLoopDirective>(innerBegin.t);
+        std::get<parser::OmpLoopDirective>(innerBegin.t);
     if (innerDirective.v == llvm::omp::Directive::OMPD_tile) {
       clauses.append(
           makeClauses(std::get<parser::OmpClauseList>(innerBegin.t), semaCtx));
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 7e55726d3b721..54a7225d8ed16 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -51,8 +51,7 @@ int64_t getCollapseValue(const List<Clause> &clauses) {
   }
 
   collapseValue = collapseValue - numTileSizes;
-  int64_t result =
-    collapseValue > numTileSizes ? collapseValue : numTileSizes;
+  int64_t result = collapseValue > numTileSizes ? collapseValue : numTileSizes;
   return result;
 }
 
diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index 10eaaa83f5f4f..c519cb43628ed 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -11,7 +11,7 @@
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/semantics.h"
 
-# include <stack>
+#include <stack>
 // After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP
 // Constructs more structured which provide explicit scopes for later
 // structural checks and semantic analysis.
@@ -153,7 +153,7 @@ class CanonicalizationOfOmp {
       std::stack<parser::OpenMPLoopConstruct *> loops;
       loops.push(&x);
       if (auto *innerConstruct{
-          GetConstructIf<parser::OpenMPConstruct>(*nextIt)}) {
+              GetConstructIf<parser::OpenMPConstruct>(*nextIt)}) {
         if (auto *innerOmpLoop{
                 std::get_if<parser::OpenMPLoopConstruct>(&innerConstruct->u)}) {
           auto &innerBeginDir{
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 0bef00fed1ba0..e2b63afe48a23 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -2995,8 +2995,7 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) {
   };
 
   if (!parser.parseOptionalKeyword("tiles") &&
-      (parser.parseLParen() ||
-      parser.parseCommaSeparatedList(parseTiles) ||
+      (parser.parseLParen() || parser.parseCommaSeparatedList(parseTiles) ||
        parser.parseRParen()))
     return failure();
 
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 5e1453bcb6de5..655645726d27c 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2966,7 +2966,7 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
 /// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
-                        LLVM::ModuleTranslation &moduleTranslation) {
+                   LLVM::ModuleTranslation &moduleTranslation) {
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
   auto loopOp = cast<omp::LoopNestOp>(opInst);
   // Set up the source location value for OpenMP runtime.
@@ -3036,7 +3036,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
 
   //  llvm::OpenMPIRBuilder::InsertPointTy afterIP = builder.saveIP();
   llvm::OpenMPIRBuilder::InsertPointTy afterIP =
-    loopInfos.front()->getAfterIP();
+      loopInfos.front()->getAfterIP();
 
   // Initialize the new loop info to the current one, in case there
   // are no loop transformations done.
@@ -3048,12 +3048,12 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
     SmallVector<llvm::Value *> TileSizes;
 
     for (auto tile : tiles.value()) {
-      llvm::Value *TileVal =  llvm::ConstantInt::get(IVType, tile);
+      llvm::Value *TileVal = llvm::ConstantInt::get(IVType, tile);
       TileSizes.push_back(TileVal);
     }
 
-    std::vector<llvm::CanonicalLoopInfo*> NewLoops =
-      ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes);
+    std::vector<llvm::CanonicalLoopInfo *> NewLoops =
+        ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes);
 
     // Collapse loops. Store the insertion point because LoopInfos may get
     // invalidated.
@@ -3064,7 +3064,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
 
     // Update the loop infos
     loopInfos.clear();
-    for (const auto& newLoop : NewLoops) {
+    for (const auto &newLoop : NewLoops) {
       loopInfos.push_back(newLoop);
     }
   } // Tiling done

>From c43947c80eca400c39460143afb4db2e5caf06f7 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Sat, 14 Jun 2025 06:29:58 -0400
Subject: [PATCH 06/25] Fix formatting

---
 flang/lib/Semantics/canonicalize-omp.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index c519cb43628ed..1d00bdaad777c 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -166,10 +166,10 @@ class CanonicalizationOfOmp {
             // Retrieveing the address so that DoConstruct or inner loop can be
             // set later.
             loops.push(&(std::get<std::optional<
-                             common::Indirection<parser::OpenMPLoopConstruct>>>(
+                    common::Indirection<parser::OpenMPLoopConstruct>>>(
                 loops.top()->t)
-                             .value()
-                             .value()));
+                    .value()
+                    .value()));
             nextIt = block.erase(nextIt);
           }
         }

>From 2448d0de16018936e1e702d73c165436916c1dd4 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Thu, 19 Jun 2025 15:52:55 -0400
Subject: [PATCH 07/25] Fix test.

---
 flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 | 1 +
 1 file changed, 1 insertion(+)

diff --git a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90
index bb1929249183b..355626f6e73b9 100644
--- a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90
+++ b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90
@@ -1,6 +1,7 @@
 !RUN: %python %S/../test_errors.py %s %flang -fopenmp
 
 integer :: i, j
+! ERROR: DO CONCURRENT loops cannot be used with the COLLAPSE clause.
 !$omp parallel do collapse(2)
 do i = 1, 1
   ! ERROR: DO CONCURRENT loops cannot form part of a loop nest.

>From 2fe39d15a091c2fe581e14d8846d21cd9f27e7d6 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Fri, 20 Jun 2025 07:31:02 -0400
Subject: [PATCH 08/25] Add more mlir tests. Set collapse value when lowering
 from SCF to OpenMP.

---
 .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp    |  4 +-
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 12 +++++
 .../Conversion/SCFToOpenMP/scf-to-openmp.mlir |  2 +-
 mlir/test/Dialect/OpenMP/invalid.mlir         | 23 ++++++++
 mlir/test/Dialect/OpenMP/ops.mlir             | 54 +++++++++++++++++++
 5 files changed, 92 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
index bec17258d058f..f056e72531bfc 100644
--- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
+++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
@@ -493,8 +493,8 @@ struct ParallelOpLowering : public OpRewritePattern<scf::ParallelOp> {
         // Create loop nest and populate region with contents of scf.parallel.
         auto loopOp = omp::LoopNestOp::create(
             rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(),
-            parallelOp.getUpperBound(), parallelOp.getStep(), false, 1,
-            nullptr);
+            parallelOp.getUpperBound(), parallelOp.getStep(), false,
+            parallelOp.getLowerBound().size(), nullptr);
 
         rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(),
                                     loopOp.getRegion().begin());
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index e2b63afe48a23..9f6f9d9b91d0a 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -3058,6 +3058,18 @@ LogicalResult LoopNestOp::verify() {
              << "range argument type does not match corresponding IV type";
   }
 
+  uint64_t numIVs = getIVs().size();
+
+  if (const auto &numCollapse = getNumCollapse())
+    if (numCollapse > numIVs)
+      return emitOpError()
+             << "collapse value is larger than the number of loops";
+
+  if (const auto &tiles = getTileSizes())
+    if (tiles.value().size() > numIVs)
+      return emitOpError()
+             << "number of tilings is larger than the number of loops";
+
   if (!llvm::dyn_cast_if_present<LoopWrapperInterface>((*this)->getParentOp()))
     return emitOpError() << "expects parent op to be a loop wrapper";
 
diff --git a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
index a722acbf2c347..d362bb6092419 100644
--- a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
+++ b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
@@ -6,7 +6,7 @@ func.func @parallel(%arg0: index, %arg1: index, %arg2: index,
   // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32
   // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) {
   // CHECK: omp.wsloop {
-  // CHECK: omp.loop_nest (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
+  // CHECK: omp.loop_nest (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) collapse(2) {
   // CHECK: memref.alloca_scope
   scf.parallel (%i, %j) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
     // CHECK: "test.payload"(%[[LVAR1]], %[[LVAR2]]) : (index, index) -> ()
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index 5088f2dfa7d7a..c6b4ae02602d9 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -157,6 +157,29 @@ func.func @no_loops(%lb : index, %ub : index, %step : index) {
   }
 }
 
+// -----
+
+func.func @collapse_size(%lb : index, %ub : index, %step : index) {
+  omp.wsloop {
+    // expected-error at +1 {{collapse value is larger than the number of loops}}
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) collapse(4) {
+      omp.yield
+    }
+  }
+}
+
+// -----
+
+func.func @tiles_length(%lb : index, %ub : index, %step : index) {
+  omp.wsloop {
+    // expected-error at +1 {{number of tilings is larger than the number of loops}}
+    omp.loop_nest (%iv) : index =  (%lb) to (%ub) step (%step) tiles(2, 4) {
+      omp.yield
+    }
+  }
+}
+
+
 // -----
 
 func.func @inclusive_not_a_clause(%lb : index, %ub : index, %step : index) {
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 8c846cde1a3ca..e627a86e69185 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -376,6 +376,60 @@ func.func @omp_loop_nest_pretty_multiple(%lb1 : i32, %ub1 : i32, %step1 : i32,
   return
 }
 
+// CHECK-LABEL: omp_loop_nest_pretty_multiple_collapse
+func.func @omp_loop_nest_pretty_multiple_collapse(%lb1 : i32, %ub1 : i32, %step1 : i32,
+    %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref<?xi32>) -> () {
+
+  omp.wsloop {
+    // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) collapse(2)
+    omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) {
+      %1 = "test.payload"(%iv1) : (i32) -> (index)
+      %2 = "test.payload"(%iv2) : (i32) -> (index)
+      memref.store %iv1, %data1[%1] : memref<?xi32>
+      memref.store %iv2, %data1[%2] : memref<?xi32>
+      omp.yield
+    }
+  }
+
+  return
+}
+
+// CHECK-LABEL: omp_loop_nest_pretty_multiple_tiles
+func.func @omp_loop_nest_pretty_multiple_tiles(%lb1 : i32, %ub1 : i32, %step1 : i32,
+    %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref<?xi32>) -> () {
+
+  omp.wsloop {
+    // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) tiles(5, 10)
+    omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) tiles(5, 10) {
+      %1 = "test.payload"(%iv1) : (i32) -> (index)
+      %2 = "test.payload"(%iv2) : (i32) -> (index)
+      memref.store %iv1, %data1[%1] : memref<?xi32>
+      memref.store %iv2, %data1[%2] : memref<?xi32>
+      omp.yield
+    }
+  }
+
+  return
+}
+
+// CHECK-LABEL: omp_loop_nest_pretty_multiple_collapse_tiles
+func.func @omp_loop_nest_pretty_multiple_collapse_tiles(%lb1 : i32, %ub1 : i32, %step1 : i32,
+    %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref<?xi32>) -> () {
+
+  omp.wsloop {
+    // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) collapse(2) tiles(5, 10)
+    omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) tiles(5, 10) {
+      %1 = "test.payload"(%iv1) : (i32) -> (index)
+      %2 = "test.payload"(%iv2) : (i32) -> (index)
+      memref.store %iv1, %data1[%1] : memref<?xi32>
+      memref.store %iv2, %data1[%2] : memref<?xi32>
+      omp.yield
+    }
+  }
+
+  return
+}
+
 // CHECK-LABEL: omp_wsloop
 func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memref<i32>, %linear_var : i32, %chunk_var : i32) -> () {
 

>From 0b8f4bcc516d9182b1dbede12f6bf221951d266e Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Fri, 20 Jun 2025 07:52:28 -0400
Subject: [PATCH 09/25] Use llvm::SmallVector instead of std::stack

---
 flang/lib/Semantics/canonicalize-omp.cpp | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index 1d00bdaad777c..5264ec25fd80c 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -10,8 +10,6 @@
 #include "flang/Parser/parse-tree-visitor.h"
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/semantics.h"
-
-#include <stack>
 // After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP
 // Constructs more structured which provide explicit scopes for later
 // structural checks and semantic analysis.
@@ -150,8 +148,8 @@ class CanonicalizationOfOmp {
       if (GetConstructIf<parser::CompilerDirective>(*nextIt))
         continue;
       // Keep track of the loops to handle the end loop directives
-      std::stack<parser::OpenMPLoopConstruct *> loops;
-      loops.push(&x);
+      llvm::SmallVector<parser::OpenMPLoopConstruct *> loops;
+      loops.push_back(&x);
       if (auto *innerConstruct{
               GetConstructIf<parser::OpenMPConstruct>(*nextIt)}) {
         if (auto *innerOmpLoop{
@@ -162,12 +160,12 @@ class CanonicalizationOfOmp {
           if (innerDir.v == llvm::omp::Directive::OMPD_tile) {
             std::get<std::optional<
                 common::Indirection<parser::OpenMPLoopConstruct>>>(
-                loops.top()->t) = std::move(*innerOmpLoop);
+                loops.back()->t) = std::move(*innerOmpLoop);
             // Retrieveing the address so that DoConstruct or inner loop can be
             // set later.
-            loops.push(&(std::get<std::optional<
+            loops.push_back(&(std::get<std::optional<
                     common::Indirection<parser::OpenMPLoopConstruct>>>(
-                loops.top()->t)
+                loops.back()->t)
                     .value()
                     .value()));
             nextIt = block.erase(nextIt);
@@ -180,16 +178,16 @@ class CanonicalizationOfOmp {
           // move DoConstruct
           std::get<std::optional<std::variant<parser::DoConstruct,
               common::Indirection<parser::OpenMPLoopConstruct>>>>(
-              loops.top()->t) = std::move(*doCons);
+              loops.back()->t) = std::move(*doCons);
           nextIt = block.erase(nextIt);
           // try to match OmpEndLoopDirective
           while (nextIt != block.end() && !loops.empty()) {
             if (auto *endDir{
                     GetConstructIf<parser::OmpEndLoopDirective>(*nextIt)}) {
               std::get<std::optional<parser::OmpEndLoopDirective>>(
-                  loops.top()->t) = std::move(*endDir);
+                  loops.back()->t) = std::move(*endDir);
               nextIt = block.erase(nextIt);
-              loops.pop();
+              loops.pop_back();
             } else {
               // If there is a mismatch bail out.
               break;

>From b24d68eea0e30fcbf702ce4102b3e93fe5aca111 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Sat, 21 Jun 2025 09:11:58 -0400
Subject: [PATCH 10/25] Improve test a bit to make sure IVs are used as
 expected.

---
 flang/test/Lower/OpenMP/wsloop-tile.f90 | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/flang/test/Lower/OpenMP/wsloop-tile.f90 b/flang/test/Lower/OpenMP/wsloop-tile.f90
index f43b558ce46bb..c9bf18e3b278d 100644
--- a/flang/test/Lower/OpenMP/wsloop-tile.f90
+++ b/flang/test/Lower/OpenMP/wsloop-tile.f90
@@ -13,7 +13,7 @@ program wsloop_tile
   c=50
   x=0
 
-  !CHECK: omp.loop_nest
+  !CHECK: omp.loop_nest (%[[IV_0:.*]], %[[IV_1:.*]], %[[IV_2:.*]]) : i32
   !CHECK-SAME: tiles(2, 5, 10)
 
   !$omp do
@@ -21,6 +21,15 @@ program wsloop_tile
   do i = 1, a
      do j= 1, b
         do k = 1, c
+  !CHECK: hlfir.assign %[[IV_0]] to %[[IV_0A:.*]] : i32
+  !CHECK: hlfir.assign %[[IV_1]] to %[[IV_1A:.*]] : i32
+  !CHECK: hlfir.assign %[[IV_2]] to %[[IV_2A:.*]] : i32
+  !CHECK: %[[IVV_0:.*]] = fir.load %[[IV_0A]]
+  !CHECK: %[[SUM0:.*]] = arith.addi %{{.*}}, %[[IVV_0]] : i32
+  !CHECK: %[[IVV_1:.*]] = fir.load %[[IV_1A]]
+  !CHECK: %[[SUM1:.*]] = arith.addi %[[SUM0]], %[[IVV_1]] : i32
+  !CHECK: %[[IVV_2:.*]] = fir.load %[[IV_2A]]
+  !CHECK: %[[SUM2:.*]] = arith.addi %[[SUM1]], %[[IVV_2]] : i32
            x = x + i + j + k
         end do
      end do

>From 732a785d6b61f32270d6bb81e0f956421bf45847 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Sat, 21 Jun 2025 09:30:57 -0400
Subject: [PATCH 11/25] Fix comments to clarify canonicalization.

---
 flang/lib/Semantics/canonicalize-omp.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index 5264ec25fd80c..d5b5b14d22dc2 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -119,13 +119,15 @@ class CanonicalizationOfOmp {
     //   ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t->
     //     OmpBeginLoopDirective t-> OmpLoopDirective
     //   [ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct u->
-    ///    OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile]
+    //     OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile]
     //   ExecutableConstruct -> DoConstruct
-    //   [ExecutableConstruct -> OmpEndLoopDirective]
+    //   [ExecutableConstruct -> OmpEndLoopDirective] (note: tile)
     //   ExecutableConstruct -> OmpEndLoopDirective (if available)
     //
     // After rewriting:
     //   ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t->
+    //     [OpenMPLoopConstruct t -> OmpBeginLoopDirective -> OmpLoopDirective
+    //                               OmpEndLoopDirective] (note: tile)
     //     OmpBeginLoopDirective t -> OmpLoopDirective -> DoConstruct
     //     OmpEndLoopDirective (if available)
     parser::Block::iterator nextIt;

>From 93aafb27ba5e1829f43383087b5f73ea25ec15f7 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Sat, 21 Jun 2025 12:05:40 -0400
Subject: [PATCH 12/25] Special handling of tile directive when dealing with
 start end end loop directives.

---
 flang/lib/Semantics/canonicalize-omp.cpp | 31 ++++++++++++++++--------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index d5b5b14d22dc2..a7749b5a81678 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -160,16 +160,13 @@ class CanonicalizationOfOmp {
               std::get<parser::OmpBeginLoopDirective>(innerOmpLoop->t)};
           auto &innerDir{std::get<parser::OmpLoopDirective>(innerBeginDir.t)};
           if (innerDir.v == llvm::omp::Directive::OMPD_tile) {
-            std::get<std::optional<
+            auto &innerLoop = std::get<std::optional<
                 common::Indirection<parser::OpenMPLoopConstruct>>>(
-                loops.back()->t) = std::move(*innerOmpLoop);
+                loops.back()->t);
+            innerLoop = std::move(*innerOmpLoop);
             // Retrieveing the address so that DoConstruct or inner loop can be
             // set later.
-            loops.push_back(&(std::get<std::optional<
-                    common::Indirection<parser::OpenMPLoopConstruct>>>(
-                loops.back()->t)
-                    .value()
-                    .value()));
+            loops.push_back(&(innerLoop.value().value()));
             nextIt = block.erase(nextIt);
           }
         }
@@ -186,9 +183,23 @@ class CanonicalizationOfOmp {
           while (nextIt != block.end() && !loops.empty()) {
             if (auto *endDir{
                     GetConstructIf<parser::OmpEndLoopDirective>(*nextIt)}) {
-              std::get<std::optional<parser::OmpEndLoopDirective>>(
-                  loops.back()->t) = std::move(*endDir);
-              nextIt = block.erase(nextIt);
+              auto &endOmpDirective{
+                  std::get<parser::OmpLoopDirective>(endDir->t)};
+              auto &loopBegin{
+                  std::get<parser::OmpBeginLoopDirective>(loops.back()->t)};
+              auto &loopDir{std::get<parser::OmpLoopDirective>(loopBegin.t)};
+
+              // If the directive is a tile we try to match the corresponding
+              // end tile if it exsists. If it is not a tile directive we
+              // always assign the end loop directive and fall back on the
+              // existing directive structure checks.
+              if (loopDir.v != llvm::omp::Directive::OMPD_tile ||
+                  loopDir.v == endOmpDirective.v) {
+                std::get<std::optional<parser::OmpEndLoopDirective>>(
+                    loops.back()->t) = std::move(*endDir);
+                nextIt = block.erase(nextIt);
+              }
+
               loops.pop_back();
             } else {
               // If there is a mismatch bail out.

>From ef9213582a0c6b4c635b825b1ed88fdd4adffa29 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Sat, 21 Jun 2025 12:20:33 -0400
Subject: [PATCH 13/25] Inline functions.

---
 flang/lib/Semantics/resolve-directives.cpp | 62 +++++++++-------------
 1 file changed, 24 insertions(+), 38 deletions(-)

diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 5d3aa96964318..22b5cf8941f44 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -827,14 +827,6 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor<llvm::omp::Directive> {
   void CollectAssociatedLoopLevelsFromInnerLoopContruct(
       const parser::OpenMPLoopConstruct &, llvm::SmallVector<std::int64_t> &,
       llvm::SmallVector<const parser::OmpClause *> &);
-  template <typename T>
-  void CollectAssociatedLoopLevelFromClauseValue(
-      const parser::OmpClause &clause, llvm::SmallVector<std::int64_t> &,
-      llvm::SmallVector<const parser::OmpClause *> &);
-  template <typename T>
-  void CollectAssociatedLoopLevelFromClauseSize(const parser::OmpClause &,
-      llvm::SmallVector<std::int64_t> &,
-      llvm::SmallVector<const parser::OmpClause *> &);
   void CollectAssociatedLoopLevelsFromClauses(const parser::OmpClauseList &,
       llvm::SmallVector<std::int64_t> &,
       llvm::SmallVector<const parser::OmpClause *> &);
@@ -2074,40 +2066,34 @@ void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct(
   }
 }
 
-template <typename T>
-void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseValue(
-    const parser::OmpClause &clause, llvm::SmallVector<std::int64_t> &levels,
-    llvm::SmallVector<const parser::OmpClause *> &clauses) {
-  if (const auto tclause{std::get_if<T>(&clause.u)}) {
-    std::int64_t level = 0;
-    if (const auto v{EvaluateInt64(context_, tclause->v)}) {
-      level = *v;
-    }
-    levels.push_back(level);
-    clauses.push_back(&clause);
-  }
-}
-
-template <typename T>
-void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseSize(
-    const parser::OmpClause &clause, llvm::SmallVector<std::int64_t> &levels,
-    llvm::SmallVector<const parser::OmpClause *> &clauses) {
-  if (const auto tclause{std::get_if<T>(&clause.u)}) {
-    levels.push_back(tclause->v.size());
-    clauses.push_back(&clause);
-  }
-}
-
 void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromClauses(
     const parser::OmpClauseList &x, llvm::SmallVector<std::int64_t> &levels,
     llvm::SmallVector<const parser::OmpClause *> &clauses) {
   for (const auto &clause : x.v) {
-    CollectAssociatedLoopLevelFromClauseValue<parser::OmpClause::Ordered>(
-        clause, levels, clauses);
-    CollectAssociatedLoopLevelFromClauseValue<parser::OmpClause::Collapse>(
-        clause, levels, clauses);
-    CollectAssociatedLoopLevelFromClauseSize<parser::OmpClause::Sizes>(
-        clause, levels, clauses);
+    if (const auto oclause{
+            std::get_if<parser::OmpClause::Ordered>(&clause.u)}) {
+      std::int64_t level = 0;
+      if (const auto v{EvaluateInt64(context_, oclause->v)}) {
+        level = *v;
+      }
+      levels.push_back(level);
+      clauses.push_back(&clause);
+    }
+
+    if (const auto cclause{
+            std::get_if<parser::OmpClause::Collapse>(&clause.u)}) {
+      std::int64_t level = 0;
+      if (const auto v{EvaluateInt64(context_, cclause->v)}) {
+        level = *v;
+      }
+      levels.push_back(level);
+      clauses.push_back(&clause);
+    }
+
+    if (const auto tclause{std::get_if<parser::OmpClause::Sizes>(&clause.u)}) {
+      levels.push_back(tclause->v.size());
+      clauses.push_back(&clause);
+    }
   }
 }
 

>From 765af1ccf80adbf79685533adab32db7e9a78e24 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Mon, 23 Jun 2025 11:03:50 -0400
Subject: [PATCH 14/25] Remove debug code.

---
 llvm/lib/Transforms/Utils/CodeExtractor.cpp | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 921e2ff2506fc..7a9dd37b72205 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -810,11 +810,7 @@ void CodeExtractor::severSplitPHINodesOfExits() {
 }
 
 void CodeExtractor::splitReturnBlocks() {
-  for (BasicBlock *Block : Blocks) {
-    if (!Block->getTerminator()) {
-      errs() << "====== No terminator in block: " << Block->getName()
-             << "======\n";
-    }
+  for (BasicBlock *Block : Blocks)
     if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) {
       BasicBlock *New =
           Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret");
@@ -831,7 +827,6 @@ void CodeExtractor::splitReturnBlocks() {
           DT->changeImmediateDominator(I, NewNode);
       }
     }
-  }
 }
 
 Function *CodeExtractor::constructFunctionDeclaration(

>From 26f441ad2e884a8ba38f6d0160bc554bba60e79b Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Mon, 23 Jun 2025 11:08:42 -0400
Subject: [PATCH 15/25] Reuse loop op lowering, add comment.

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 6e41756bf7b89..a9b7b5647f1dd 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -3522,7 +3522,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
     break;
   case llvm::omp::Directive::OMPD_tile:
     newOp =
-        genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item);
+        genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_unroll:
     genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);

>From 3a7002d7ed2c7995b8483073c5fdcda429fe5049 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Mon, 23 Jun 2025 11:12:51 -0400
Subject: [PATCH 16/25] Fix formatting.

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index a9b7b5647f1dd..9a00ddc609755 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -3521,8 +3521,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
                        item);
     break;
   case llvm::omp::Directive::OMPD_tile:
-    newOp =
-        genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item);
+    newOp = genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_unroll:
     genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);

>From b045881706da2efda0a2c3155e845e5022da43a6 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Mon, 23 Jun 2025 11:21:03 -0400
Subject: [PATCH 17/25] Remove curly braces.

---
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 9f6f9d9b91d0a..b235671cc7be7 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -2978,11 +2978,10 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) {
       (parser.parseLParen() || parser.parseInteger(value) ||
        parser.parseRParen()))
     return failure();
-  if (value > 1) {
+  if (value > 1)
     result.addAttribute(
         "num_collapse",
         IntegerAttr::get(parser.getBuilder().getI64Type(), value));
-  }
 
   // Parse tiles
   SmallVector<int64_t> tiles;
@@ -2999,9 +2998,8 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) {
        parser.parseRParen()))
     return failure();
 
-  if (tiles.size() > 0) {
+  if (tiles.size() > 0)
     result.addAttribute("tile_sizes", DenseI64ArrayAttr::get(ctx, tiles));
-  }
 
   // Parse the body.
   Region *region = result.addRegion();
@@ -3026,13 +3024,13 @@ void LoopNestOp::print(OpAsmPrinter &p) {
   if (getLoopInclusive())
     p << "inclusive ";
   p << "step (" << getLoopSteps() << ") ";
-  if (int64_t numCollapse = getNumCollapse()) {
+  if (int64_t numCollapse = getNumCollapse())
     if (numCollapse > 1)
       p << "collapse(" << numCollapse << ") ";
-  }
-  if (const auto tiles = getTileSizes()) {
+
+  if (const auto tiles = getTileSizes())
     p << "tiles(" << tiles.value() << ") ";
-  }
+
   p.printRegion(region, /*printEntryBlockArgs=*/false);
 }
 

>From 131ade1b07fc5864f7e539f4e75dd2ca3fc8c2b5 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Wed, 25 Jun 2025 10:11:36 -0400
Subject: [PATCH 18/25] Avoid attaching the sizes clause to the parent
 construct, instead find the tile sizes through the parse tree when getting
 the information needed to create the loop nest ops.

---
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 23 ++---
 flang/lib/Lower/OpenMP/Utils.cpp              | 90 ++++++++++++++++++-
 flang/lib/Lower/OpenMP/Utils.h                |  5 ++
 .../Frontend/OpenMP/ConstructDecompositionT.h |  4 -
 4 files changed, 99 insertions(+), 23 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 9a00ddc609755..b8aeba6b31fe7 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -46,6 +46,7 @@
 
 using namespace Fortran::lower::omp;
 using namespace Fortran::common::openmp;
+using namespace Fortran::semantics;
 
 //===----------------------------------------------------------------------===//
 // Code generation helper functions
@@ -1690,6 +1691,7 @@ genLoopNestClauses(lower::AbstractConverter &converter,
       int64_t collapseValue = evaluate::ToInt64(collapse.v).value();
       clauseOps.numCollapse = firOpBuilder.getI64IntegerAttr(collapseValue);
     } else if (clause.id == llvm::omp::Clause::OMPC_sizes) {
+      // This case handles the stand-alone tiling construct
       const auto &sizes = std::get<clause::Sizes>(clause.u);
       llvm::SmallVector<int64_t> sizeValues;
       for (auto &size : sizes.v) {
@@ -1699,6 +1701,12 @@ genLoopNestClauses(lower::AbstractConverter &converter,
       clauseOps.tileSizes = sizeValues;
     }
   }
+
+  llvm::SmallVector<int64_t> sizeValues;
+  auto *ompCons{eval.getIf<parser::OpenMPConstruct>()};
+  collectTileSizesFromOpenMPConstruct (ompCons, sizeValues, semaCtx);
+  if (sizeValues.size() > 0)
+    clauseOps.tileSizes = sizeValues;
 }
 
 static void genLoopClauses(
@@ -3936,21 +3944,6 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
   List<Clause> clauses = makeClauses(
       std::get<parser::OmpClauseList>(beginLoopDirective.t), semaCtx);
 
-  const auto &innerOptional =
-      std::get<std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
-          loopConstruct.t);
-  if (innerOptional.has_value()) {
-    const auto &innerLoopDirective = innerOptional.value().value();
-    const auto &innerBegin =
-        std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
-    const auto &innerDirective =
-        std::get<parser::OmpLoopDirective>(innerBegin.t);
-    if (innerDirective.v == llvm::omp::Directive::OMPD_tile) {
-      clauses.append(
-          makeClauses(std::get<parser::OmpClauseList>(innerBegin.t), semaCtx));
-    }
-  }
-
   if (auto &endLoopDirective =
           std::get<std::optional<parser::OmpEndLoopDirective>>(
               loopConstruct.t)) {
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 54a7225d8ed16..3459e1bedfc0c 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -14,6 +14,7 @@
 
 #include "ClauseFinder.h"
 #include "flang/Lower/OpenMP/Clauses.h"
+#include "flang/Evaluate/fold.h"
 #include <flang/Lower/AbstractConverter.h>
 #include <flang/Lower/ConvertType.h>
 #include <flang/Lower/DirectivesCommon.h>
@@ -24,10 +25,31 @@
 #include <flang/Parser/parse-tree.h>
 #include <flang/Parser/tools.h>
 #include <flang/Semantics/tools.h>
+#include <flang/Semantics/type.h>
 #include <llvm/Support/CommandLine.h>
 
 #include <iterator>
 
+using namespace Fortran::semantics;
+
+template <typename T>
+MaybeIntExpr
+EvaluateIntExpr(SemanticsContext &context, const T &expr) {
+  if (MaybeExpr maybeExpr{
+          Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) {
+    if (auto *intExpr{Fortran::evaluate::UnwrapExpr<SomeIntExpr>(*maybeExpr)}) {
+      return std::move(*intExpr);
+    }
+  }
+  return std::nullopt;
+}
+
+template <typename T>
+std::optional<std::int64_t>
+EvaluateInt64(SemanticsContext &context, const T &expr) {
+  return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr));
+}
+
 llvm::cl::opt<bool> treatIndexAsSection(
     "openmp-treat-index-as-section",
     llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."),
@@ -614,6 +636,43 @@ static void convertLoopBounds(lower::AbstractConverter &converter,
   }
 }
 
+// Populates the sizes vector with values if the given OpenMPConstruct
+// Contains a loop construct with an inner tiling construct.
+void collectTileSizesFromOpenMPConstruct(
+    const parser::OpenMPConstruct *ompCons,
+    llvm::SmallVectorImpl<int64_t> &tileSizes,
+    SemanticsContext &semaCtx) {
+  if (!ompCons)
+    return;
+
+  if (auto *ompLoop{std::get_if<parser::OpenMPLoopConstruct>(&ompCons->u)}) {
+    const auto &innerOptional = std::get<
+        std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
+        ompLoop->t);
+    if (innerOptional.has_value()) {
+      const auto &innerLoopDirective = innerOptional.value().value();
+      const auto &innerBegin =
+          std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
+      const auto &innerDirective =
+          std::get<parser::OmpLoopDirective>(innerBegin.t).v;
+
+      if (innerDirective == llvm::omp::Directive::OMPD_tile) {
+        // Get the size values from parse tree and convert to a vector
+        const auto &innerClauseList{
+            std::get<parser::OmpClauseList>(innerBegin.t)};
+        for (const auto &clause : innerClauseList.v)
+          if (const auto tclause{
+                  std::get_if<parser::OmpClause::Sizes>(&clause.u)}) {
+            for (auto &tval : tclause->v) {
+              if (const auto v{EvaluateInt64(semaCtx, tval)})
+                tileSizes.push_back(*v);
+            }
+          }
+      }
+    }
+  }
+}
+
 bool collectLoopRelatedInfo(
     lower::AbstractConverter &converter, mlir::Location currentLocation,
     lower::pft::Evaluation &eval, const omp::List<omp::Clause> &clauses,
@@ -635,11 +694,34 @@ bool collectLoopRelatedInfo(
     collapseValue = evaluate::ToInt64(clause->v).value();
     found = true;
   }
+
+  // Collect sizes from tile directive if present
   std::int64_t sizesLengthValue = 0l;
-  if (auto *clause =
-          ClauseFinder::findUniqueClause<omp::clause::Sizes>(clauses)) {
-    sizesLengthValue = clause->v.size();
-    found = true;
+  if (auto *ompCons{eval.getIf<parser::OpenMPConstruct>()}) {
+    if (auto *ompLoop{std::get_if<parser::OpenMPLoopConstruct>(&ompCons->u)}) {
+      const auto &innerOptional = std::get<
+          std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
+          ompLoop->t);
+      if (innerOptional.has_value()) {
+        const auto &innerLoopDirective = innerOptional.value().value();
+        const auto &innerBegin =
+            std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
+        const auto &innerDirective =
+            std::get<parser::OmpLoopDirective>(innerBegin.t).v;
+
+        if (innerDirective == llvm::omp::Directive::OMPD_tile) {
+          // Get the size values from parse tree and convert to a vector
+          const auto &innerClauseList{
+              std::get<parser::OmpClauseList>(innerBegin.t)};
+          for (const auto &clause : innerClauseList.v)
+            if (const auto tclause{
+                    std::get_if<parser::OmpClause::Sizes>(&clause.u)}) {
+              sizesLengthValue = tclause->v.size();
+              found = true;
+            }
+        }
+      }
+    }
   }
 
   collapseValue = collapseValue - sizesLengthValue;
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 11641ba5e8606..eb71b7eee4200 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -167,6 +167,11 @@ bool collectLoopRelatedInfo(
     mlir::omp::LoopRelatedClauseOps &result,
     llvm::SmallVectorImpl<const semantics::Symbol *> &iv);
 
+void collectTileSizesFromOpenMPConstruct(
+    const parser::OpenMPConstruct *ompCons,
+    llvm::SmallVectorImpl<int64_t> &tileSizes,
+    Fortran::semantics::SemanticsContext &semaCtx);
+
 } // namespace omp
 } // namespace lower
 } // namespace Fortran
diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
index e1083b7ef2cd9..5bb1f3f36b65e 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h
@@ -497,10 +497,6 @@ bool ConstructDecompositionT<C, H>::applyClause(
     if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) {
       last.clauses.push_back(node);
       return true;
-    } else {
-      //      llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n";
-      last.clauses.push_back(node);
-      return true;
     }
   }
 

>From 63ff065fe20f067f1ba451dc4b3fd7d07de718a9 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Wed, 25 Jun 2025 10:33:33 -0400
Subject: [PATCH 19/25] Fix formatting

---
 flang/lib/Lower/OpenMP/OpenMP.cpp |  2 +-
 flang/lib/Lower/OpenMP/Utils.cpp  | 10 ++++------
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index b8aeba6b31fe7..4a111b5a69499 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1704,7 +1704,7 @@ genLoopNestClauses(lower::AbstractConverter &converter,
 
   llvm::SmallVector<int64_t> sizeValues;
   auto *ompCons{eval.getIf<parser::OpenMPConstruct>()};
-  collectTileSizesFromOpenMPConstruct (ompCons, sizeValues, semaCtx);
+  collectTileSizesFromOpenMPConstruct(ompCons, sizeValues, semaCtx);
   if (sizeValues.size() > 0)
     clauseOps.tileSizes = sizeValues;
 }
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 3459e1bedfc0c..cfe7fce6a9626 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -33,8 +33,7 @@
 using namespace Fortran::semantics;
 
 template <typename T>
-MaybeIntExpr
-EvaluateIntExpr(SemanticsContext &context, const T &expr) {
+MaybeIntExpr EvaluateIntExpr(SemanticsContext &context, const T &expr) {
   if (MaybeExpr maybeExpr{
           Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) {
     if (auto *intExpr{Fortran::evaluate::UnwrapExpr<SomeIntExpr>(*maybeExpr)}) {
@@ -45,8 +44,8 @@ EvaluateIntExpr(SemanticsContext &context, const T &expr) {
 }
 
 template <typename T>
-std::optional<std::int64_t>
-EvaluateInt64(SemanticsContext &context, const T &expr) {
+std::optional<std::int64_t> EvaluateInt64(SemanticsContext &context,
+                                          const T &expr) {
   return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr));
 }
 
@@ -640,8 +639,7 @@ static void convertLoopBounds(lower::AbstractConverter &converter,
 // Contains a loop construct with an inner tiling construct.
 void collectTileSizesFromOpenMPConstruct(
     const parser::OpenMPConstruct *ompCons,
-    llvm::SmallVectorImpl<int64_t> &tileSizes,
-    SemanticsContext &semaCtx) {
+    llvm::SmallVectorImpl<int64_t> &tileSizes, SemanticsContext &semaCtx) {
   if (!ompCons)
     return;
 

>From 9a8e1341b0c730921e4d2f2c4653e271b175d0e3 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Thu, 26 Jun 2025 10:03:13 -0400
Subject: [PATCH 20/25] Fix unparse and add a test for nested loop constructs.

---
 flang/test/Parser/OpenMP/do-tile-size.f90 | 29 +++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 flang/test/Parser/OpenMP/do-tile-size.f90

diff --git a/flang/test/Parser/OpenMP/do-tile-size.f90 b/flang/test/Parser/OpenMP/do-tile-size.f90
new file mode 100644
index 0000000000000..886ee4a2a680c
--- /dev/null
+++ b/flang/test/Parser/OpenMP/do-tile-size.f90
@@ -0,0 +1,29 @@
+! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck --ignore-case %s
+! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix="PARSE-TREE" %s
+
+subroutine openmp_do_tiles(x)
+
+  integer, intent(inout)::x
+
+
+!CHECK: !$omp do
+!CHECK: !$omp tile sizes
+!$omp do
+!$omp  tile sizes(2)
+!CHECK: do
+  do x = 1, 100
+     call F1()
+!CHECK: end do
+  end do
+!CHECK: !$omp end tile
+!$omp end tile
+!$omp end do
+
+!PARSE-TREE:| | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+!PARSE-TREE:| | | OmpBeginLoopDirective
+!PARSE-TREE:| | | OpenMPLoopConstruct
+!PARSE-TREE:| | | | OmpBeginLoopDirective
+!PARSE-TREE:| | | | | OmpLoopDirective -> llvm::omp::Directive = tile
+!PARSE-TREE:| | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4'
+!PARSE-TREE: | | | | DoConstruct
+END subroutine openmp_do_tiles

>From 2a655bbf6fca0eaabd2a535b8e8847bb387d8b2f Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Thu, 26 Jun 2025 10:50:08 -0400
Subject: [PATCH 21/25] Use more convenient function to get
 OpenMPLoopConstruct. Fix comments.

---
 flang/lib/Semantics/canonicalize-omp.cpp      | 30 ++++++++-----------
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  9 +++---
 2 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index a7749b5a81678..79630b564e51a 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -152,23 +152,19 @@ class CanonicalizationOfOmp {
       // Keep track of the loops to handle the end loop directives
       llvm::SmallVector<parser::OpenMPLoopConstruct *> loops;
       loops.push_back(&x);
-      if (auto *innerConstruct{
-              GetConstructIf<parser::OpenMPConstruct>(*nextIt)}) {
-        if (auto *innerOmpLoop{
-                std::get_if<parser::OpenMPLoopConstruct>(&innerConstruct->u)}) {
-          auto &innerBeginDir{
-              std::get<parser::OmpBeginLoopDirective>(innerOmpLoop->t)};
-          auto &innerDir{std::get<parser::OmpLoopDirective>(innerBeginDir.t)};
-          if (innerDir.v == llvm::omp::Directive::OMPD_tile) {
-            auto &innerLoop = std::get<std::optional<
-                common::Indirection<parser::OpenMPLoopConstruct>>>(
-                loops.back()->t);
-            innerLoop = std::move(*innerOmpLoop);
-            // Retrieveing the address so that DoConstruct or inner loop can be
-            // set later.
-            loops.push_back(&(innerLoop.value().value()));
-            nextIt = block.erase(nextIt);
-          }
+      if (auto *innerOmpLoop{GetOmpIf<parser::OpenMPLoopConstruct>(*nextIt)}) {
+        auto &innerBeginDir{
+            std::get<parser::OmpBeginLoopDirective>(innerOmpLoop->t)};
+        auto &innerDir{std::get<parser::OmpLoopDirective>(innerBeginDir.t)};
+        if (innerDir.v == llvm::omp::Directive::OMPD_tile) {
+          auto &innerLoop = std::get<
+              std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
+              loops.back()->t);
+          innerLoop = std::move(*innerOmpLoop);
+          // Retrieveing the address so that DoConstruct or inner loop can be
+          // set later.
+          loops.push_back(&(innerLoop.value().value()));
+          nextIt = block.erase(nextIt);
         }
       }
 
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 655645726d27c..2b6dce4593b2f 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3034,7 +3034,6 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
     loopInfos.push_back(*loopResult);
   }
 
-  //  llvm::OpenMPIRBuilder::InsertPointTy afterIP = builder.saveIP();
   llvm::OpenMPIRBuilder::InsertPointTy afterIP =
       loopInfos.front()->getAfterIP();
 
@@ -3055,10 +3054,10 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
     std::vector<llvm::CanonicalLoopInfo *> NewLoops =
         ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes);
 
-    // Collapse loops. Store the insertion point because LoopInfos may get
-    // invalidated.
-    auto AfterBB = NewLoops.front()->getAfter();
-    auto AfterAfterBB = AfterBB->getSingleSuccessor();
+    // Update afterIP to get the correct insertion point after
+    // tiling.
+    llvm::BasicBlock *AfterBB = NewLoops.front()->getAfter();
+    llvm::BasicBlock  *AfterAfterBB = AfterBB->getSingleSuccessor();
     afterIP = {AfterAfterBB, AfterAfterBB->begin()};
     NewTopLoopInfo = NewLoops[0];
 

>From 90e4b8b2bc37a8aca58c6de5290b41bf2b62ca9a Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Thu, 26 Jun 2025 10:54:46 -0400
Subject: [PATCH 22/25] Fix formatting.

---
 .../Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 2b6dce4593b2f..62a4dfd173edf 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3057,7 +3057,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
     // Update afterIP to get the correct insertion point after
     // tiling.
     llvm::BasicBlock *AfterBB = NewLoops.front()->getAfter();
-    llvm::BasicBlock  *AfterAfterBB = AfterBB->getSingleSuccessor();
+    llvm::BasicBlock *AfterAfterBB = AfterBB->getSingleSuccessor();
     afterIP = {AfterAfterBB, AfterAfterBB->begin()};
     NewTopLoopInfo = NewLoops[0];
 

>From 5536a2c8766c3845cb86c4211a55592ffc0cadf0 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Sat, 9 Aug 2025 10:53:23 -0400
Subject: [PATCH 23/25] Fix merge problems related to the different
 representations used for nested loop constructs.

---
 flang/lib/Lower/OpenMP/OpenMP.cpp          | 52 +++++-----------------
 flang/lib/Lower/OpenMP/Utils.cpp           | 28 +++++++-----
 flang/lib/Semantics/canonicalize-omp.cpp   | 10 +++--
 flang/lib/Semantics/resolve-directives.cpp | 34 ++++++++------
 4 files changed, 56 insertions(+), 68 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 4a111b5a69499..743cef25ead6c 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -421,14 +421,19 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
 
               // FIXME(JAN): For now we check if there is an inner
               // OpenMPLoopConstruct, and extract the size clause from there
-              const auto &innerOptional = std::get<std::optional<
-                  common::Indirection<parser::OpenMPLoopConstruct>>>(
-                  ompConstruct.t);
-              if (innerOptional.has_value()) {
-                const auto &innerLoopDirective = innerOptional.value().value();
+              const auto &nestedOptional =
+                  std::get<std::optional<parser::NestedConstruct>>(
+                      ompConstruct.t);
+              assert(nestedOptional.has_value() &&
+                     "Expected a DoConstruct or OpenMPLoopConstruct");
+              const auto *innerConstruct =
+                  std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
+                      &(nestedOptional.value()));
+              if (innerConstruct) {
+                const auto &innerLoopConstruct = innerConstruct->value();
                 const auto &innerBegin =
                     std::get<parser::OmpBeginLoopDirective>(
-                        innerLoopDirective.t);
+                        innerLoopConstruct.t);
                 const auto &innerDirective =
                     std::get<parser::OmpLoopDirective>(innerBegin.t);
                 if (innerDirective.v == llvm::omp::Directive::OMPD_tile) {
@@ -2276,41 +2281,6 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter,
   // Apply unrolling to it
   auto cli = canonLoop.getCli();
   mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli);
-
-static mlir::omp::LoopOp genTiledLoopOp(lower::AbstractConverter &converter,
-                                        lower::SymMap &symTable,
-                                        semantics::SemanticsContext &semaCtx,
-                                        lower::pft::Evaluation &eval,
-                                        mlir::Location loc,
-                                        const ConstructQueue &queue,
-                                        ConstructQueue::const_iterator item) {
-  mlir::omp::LoopOperands loopClauseOps;
-  llvm::SmallVector<const semantics::Symbol *> loopReductionSyms;
-  genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps,
-                 loopReductionSyms);
-
-  DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
-                           /*shouldCollectPreDeterminedSymbols=*/true,
-                           /*useDelayedPrivatization=*/true, symTable);
-  dsp.processStep1(&loopClauseOps);
-
-  mlir::omp::LoopNestOperands loopNestClauseOps;
-  llvm::SmallVector<const semantics::Symbol *> iv;
-  genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
-                     loopNestClauseOps, iv);
-
-  EntryBlockArgs loopArgs;
-  loopArgs.priv.syms = dsp.getDelayedPrivSymbols();
-  loopArgs.priv.vars = loopClauseOps.privateVars;
-  loopArgs.reduction.syms = loopReductionSyms;
-  loopArgs.reduction.vars = loopClauseOps.reductionVars;
-
-  auto loopOp =
-      genWrapperOp<mlir::omp::LoopOp>(converter, loc, loopClauseOps, loopArgs);
-  genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
-                loopNestClauseOps, iv, {{loopOp, loopArgs}},
-                llvm::omp::Directive::OMPD_loop, dsp);
-  return loopOp;
 }
 
 static mlir::omp::MaskedOp
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index cfe7fce6a9626..2cbc304e2e2a9 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -644,11 +644,15 @@ void collectTileSizesFromOpenMPConstruct(
     return;
 
   if (auto *ompLoop{std::get_if<parser::OpenMPLoopConstruct>(&ompCons->u)}) {
-    const auto &innerOptional = std::get<
-        std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
-        ompLoop->t);
-    if (innerOptional.has_value()) {
-      const auto &innerLoopDirective = innerOptional.value().value();
+    const auto &nestedOptional =
+        std::get<std::optional<parser::NestedConstruct>>(ompLoop->t);
+    assert(nestedOptional.has_value() &&
+           "Expected a DoConstruct or OpenMPLoopConstruct");
+    const auto *innerConstruct =
+        std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
+            &(nestedOptional.value()));
+    if (innerConstruct) {
+      const auto &innerLoopDirective = innerConstruct->value();
       const auto &innerBegin =
           std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
       const auto &innerDirective =
@@ -697,11 +701,15 @@ bool collectLoopRelatedInfo(
   std::int64_t sizesLengthValue = 0l;
   if (auto *ompCons{eval.getIf<parser::OpenMPConstruct>()}) {
     if (auto *ompLoop{std::get_if<parser::OpenMPLoopConstruct>(&ompCons->u)}) {
-      const auto &innerOptional = std::get<
-          std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
-          ompLoop->t);
-      if (innerOptional.has_value()) {
-        const auto &innerLoopDirective = innerOptional.value().value();
+      const auto &nestedOptional =
+          std::get<std::optional<parser::NestedConstruct>>(ompLoop->t);
+      assert(nestedOptional.has_value() &&
+             "Expected a DoConstruct or OpenMPLoopConstruct");
+      const auto *innerConstruct =
+          std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
+              &(nestedOptional.value()));
+      if (innerConstruct) {
+        const auto &innerLoopDirective = innerConstruct->value();
         const auto &innerBegin =
             std::get<parser::OmpBeginLoopDirective>(innerLoopDirective.t);
         const auto &innerDirective =
diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index 79630b564e51a..4792bf2cb217c 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -157,13 +157,15 @@ class CanonicalizationOfOmp {
             std::get<parser::OmpBeginLoopDirective>(innerOmpLoop->t)};
         auto &innerDir{std::get<parser::OmpLoopDirective>(innerBeginDir.t)};
         if (innerDir.v == llvm::omp::Directive::OMPD_tile) {
-          auto &innerLoop = std::get<
-              std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
-              loops.back()->t);
+          auto &innerLoopVariant =
+              std::get<std::optional<parser::NestedConstruct>>(loops.back()->t);
+          auto &innerLoop =
+              std::get<common::Indirection<parser::OpenMPLoopConstruct>>(
+                  innerLoopVariant.value());
           innerLoop = std::move(*innerOmpLoop);
           // Retrieveing the address so that DoConstruct or inner loop can be
           // set later.
-          loops.push_back(&(innerLoop.value().value()));
+          loops.push_back(&(innerLoop.value()));
           nextIt = block.erase(nextIt);
         }
       }
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 22b5cf8941f44..682233fbef564 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -2057,12 +2057,18 @@ void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct(
     const parser::OpenMPLoopConstruct &x,
     llvm::SmallVector<std::int64_t> &levels,
     llvm::SmallVector<const parser::OmpClause *> &clauses) {
-  const auto &innerOptional =
-      std::get<std::optional<common::Indirection<parser::OpenMPLoopConstruct>>>(
-          x.t);
-  if (innerOptional.has_value()) {
+
+  const auto &nestedOptional =
+      std::get<std::optional<parser::NestedConstruct>>(x.t);
+  assert(nestedOptional.has_value() &&
+      "Expected a DoConstruct or OpenMPLoopConstruct");
+  const auto *innerConstruct =
+      std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
+          &(nestedOptional.value()));
+
+  if (innerConstruct) {
     CollectAssociatedLoopLevelsFromLoopConstruct(
-        innerOptional.value().value(), levels, clauses);
+        innerConstruct->value(), levels, clauses);
   }
 }
 
@@ -2127,17 +2133,19 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
   bool hasCollapseClause{
       clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false};
   const parser::OpenMPLoopConstruct *innerMostLoop = &x;
-
+  const parser::NestedConstruct *innerMostNest = nullptr;
   while (auto &optLoopCons{
-      std::get<std::optional<parser::NestedConstruct>>(x.t)}) {
-    if (const auto &innerLoop{
-            std::get_if < parser::OpenMPLoopConstruct >>> (innerMostLoop->t)}) {
-      innerMostLoop = &innerLoop.value().value();
+      std::get<std::optional<parser::NestedConstruct>>(innerMostLoop->t)}) {
+    innerMostNest = &(optLoopCons.value());
+    if (const auto *innerLoop{
+            std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
+                innerMostNest)}) {
+      innerMostLoop = &(innerLoop->value());
     }
   }
 
-  if (optLoopCons.has_value()) {
-    if (const auto &outer{std::get_if<parser::DoConstruct>(innerMostLoop->t)}) {
+  if (innerMostNest) {
+    if (const auto &outer{std::get_if<parser::DoConstruct>(innerMostNest)}) {
       for (const parser::DoConstruct *loop{&*outer}; loop && level > 0;
           --level) {
         if (loop->IsDoConcurrent()) {
@@ -2173,7 +2181,7 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
       CheckAssocLoopLevel(level, GetAssociatedClause());
     } else if (const auto &loop{std::get_if<
                    common::Indirection<parser::OpenMPLoopConstruct>>(
-                   &*optLoopCons)}) {
+                   innerMostNest)}) {
       auto &beginDirective =
           std::get<parser::OmpBeginLoopDirective>(loop->value().t);
       auto &beginLoopDirective =

>From 67f025a2667cee98febc679cb12e5440890ea253 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Sat, 9 Aug 2025 12:27:35 -0400
Subject: [PATCH 24/25] Fix bugs introduced when merging.

---
 flang/lib/Lower/OpenMP/OpenMP.cpp             |  4 ++--
 flang/lib/Semantics/canonicalize-omp.cpp      | 19 +++++++++---------
 flang/lib/Semantics/resolve-directives.cpp    |  3 ++-
 ...nested-loop-transformation-construct01.f90 | 20 -------------------
 flang/test/Lower/OpenMP/wsloop-tile.f90       |  2 +-
 5 files changed, 15 insertions(+), 33 deletions(-)
 delete mode 100644 flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 743cef25ead6c..4a9b28a0f018c 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -419,8 +419,8 @@ static void processHostEvalClauses(lower::AbstractConverter &converter,
               beginClauseList =
                   &std::get<parser::OmpClauseList>(beginDirective.t);
 
-              // FIXME(JAN): For now we check if there is an inner
-              // OpenMPLoopConstruct, and extract the size clause from there
+              // For now we check if there is an inner OpenMPLoopConstruct, and
+              // extract the size clause from there
               const auto &nestedOptional =
                   std::get<std::optional<parser::NestedConstruct>>(
                       ompConstruct.t);
diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index 4792bf2cb217c..c664171350d9e 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -143,7 +143,6 @@ class CanonicalizationOfOmp {
           "If a loop construct has been fully unrolled, it cannot then be tiled"_err_en_US,
           parser::ToUpperCaseLetters(dir.source.ToString()));
     };
-
     nextIt = it;
     while (++nextIt != block.end()) {
       // Ignore compiler directives.
@@ -159,14 +158,16 @@ class CanonicalizationOfOmp {
         if (innerDir.v == llvm::omp::Directive::OMPD_tile) {
           auto &innerLoopVariant =
               std::get<std::optional<parser::NestedConstruct>>(loops.back()->t);
-          auto &innerLoop =
-              std::get<common::Indirection<parser::OpenMPLoopConstruct>>(
-                  innerLoopVariant.value());
-          innerLoop = std::move(*innerOmpLoop);
-          // Retrieveing the address so that DoConstruct or inner loop can be
-          // set later.
-          loops.push_back(&(innerLoop.value()));
-          nextIt = block.erase(nextIt);
+          if (innerLoopVariant.has_value()) {
+            auto *innerLoop =
+                std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
+                    &(innerLoopVariant.value()));
+            *innerLoop = std::move(*innerOmpLoop);
+            // Retrieveing the address so that DoConstruct or inner loop can be
+            // set later.
+            loops.push_back(&(innerLoop->value()));
+            nextIt = block.erase(nextIt);
+          }
         }
       }
 
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 682233fbef564..a95b5d5d15293 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -2141,7 +2141,8 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
             std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
                 innerMostNest)}) {
       innerMostLoop = &(innerLoop->value());
-    }
+    } else
+      break;
   }
 
   if (innerMostNest) {
diff --git a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 b/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90
deleted file mode 100644
index 17eba93a7405d..0000000000000
--- a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90
+++ /dev/null
@@ -1,20 +0,0 @@
-! Test to ensure TODO message is emitted for tile OpenMP 5.1 Directives when they are nested.
-
-!RUN: not %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | FileCheck %s
-
-subroutine loop_transformation_construct
-  implicit none
-  integer :: I = 10
-  integer :: x
-  integer :: y(I)
-
-  !$omp do
-  !$omp tile
-  do i = 1, I
-    y(i) = y(i) * 5
-  end do
-  !$omp end tile
-  !$omp end do
-end subroutine
-
-!CHECK: not yet implemented: Unhandled loop directive (tile)
diff --git a/flang/test/Lower/OpenMP/wsloop-tile.f90 b/flang/test/Lower/OpenMP/wsloop-tile.f90
index c9bf18e3b278d..4c412b357f52e 100644
--- a/flang/test/Lower/OpenMP/wsloop-tile.f90
+++ b/flang/test/Lower/OpenMP/wsloop-tile.f90
@@ -2,7 +2,7 @@
 
 ! RUN: bbc -fopenmp -fopenmp-version=51 -emit-hlfir %s -o - | FileCheck %s
 
-!CHECK-LABEL:   func.func @_QQmain() attributes {fir.bindc_name = "wsloop_tile"} {
+!CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "WSLOOP_TILE"} {
 program wsloop_tile
   integer :: i, j, k
   integer :: a, b, c

>From fabca26060ae11099586a079bf1258a5361bd9f1 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin at yahoo.com>
Date: Mon, 11 Aug 2025 07:23:15 -0400
Subject: [PATCH 25/25] Move include

---
 flang/lib/Lower/OpenMP/Utils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 2cbc304e2e2a9..e3afb3c56fcd2 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -13,8 +13,8 @@
 #include "Utils.h"
 
 #include "ClauseFinder.h"
-#include "flang/Lower/OpenMP/Clauses.h"
 #include "flang/Evaluate/fold.h"
+#include "flang/Lower/OpenMP/Clauses.h"
 #include <flang/Lower/AbstractConverter.h>
 #include <flang/Lower/ConvertType.h>
 #include <flang/Lower/DirectivesCommon.h>



More information about the flang-commits mailing list