[flang] [llvm] [mlir] [openmp] [MLIR][OpenMP] Add omp.fuse operation (PR #168898)

Thu Nov 20 08:20:56 PST 2025

https://github.com/NouTimbaler created https://github.com/llvm/llvm-project/pull/168898

This patch is a follow-up from #161213 and adds the omp.fuse loop transformation for the OpenMP dialect. Used for lowering a `!$omp fuse` in Flang.
Added Lowering and end2end tests.

>From 42a5057c156f51b5476a5f29f44f42169bb50913 Mon Sep 17 00:00:00 2001
From: Ferran Toda <ferran.todacasaban at bsc.es>
Date: Thu, 20 Nov 2025 14:32:46 +0000
Subject: [PATCH 1/2] Semantics fuse rebase

---
 flang/include/flang/Parser/openmp-utils.h     |   3 +
 .../flang/Semantics/openmp-directive-sets.h   |   7 +
 flang/lib/Lower/OpenMP/OpenMP.cpp             |  41 ++--
 flang/lib/Parser/openmp-parsers.cpp           |   1 +
 flang/lib/Parser/openmp-utils.cpp             |  17 ++
 flang/lib/Semantics/canonicalize-omp.cpp      | 117 ++++++----
 flang/lib/Semantics/check-omp-loop.cpp        | 139 +++++++++---
 flang/lib/Semantics/check-omp-structure.cpp   |   8 +-
 flang/lib/Semantics/check-omp-structure.h     |   2 +
 flang/lib/Semantics/resolve-directives.cpp    | 210 +++++++++---------
 flang/lib/Semantics/rewrite-parse-tree.cpp    |  39 ++--
 flang/test/Parser/OpenMP/fail-looprange.f90   |  11 +
 flang/test/Parser/OpenMP/fuse-looprange.f90   |  38 ++++
 flang/test/Parser/OpenMP/fuse01.f90           |  28 +++
 flang/test/Parser/OpenMP/fuse02.f90           |  97 ++++++++
 .../loop-transformation-construct04.f90       |  80 +++++++
 .../loop-transformation-construct05.f90       |  90 ++++++++
 .../OpenMP/loop-transformation-clauses01.f90  |  66 ++++++
 .../loop-transformation-construct01.f90       |   4 +-
 .../loop-transformation-construct02.f90       |  93 ++++++++
 .../loop-transformation-construct03.f90       |  39 ++++
 .../loop-transformation-construct04.f90       |  47 ++++
 flang/test/Semantics/OpenMP/tile02.f90        |   2 +-
 23 files changed, 963 insertions(+), 216 deletions(-)
 create mode 100644 flang/test/Parser/OpenMP/fail-looprange.f90
 create mode 100644 flang/test/Parser/OpenMP/fuse-looprange.f90
 create mode 100644 flang/test/Parser/OpenMP/fuse01.f90
 create mode 100644 flang/test/Parser/OpenMP/fuse02.f90
 create mode 100644 flang/test/Parser/OpenMP/loop-transformation-construct04.f90
 create mode 100644 flang/test/Parser/OpenMP/loop-transformation-construct05.f90
 create mode 100644 flang/test/Semantics/OpenMP/loop-transformation-clauses01.f90
 create mode 100644 flang/test/Semantics/OpenMP/loop-transformation-construct02.f90
 create mode 100644 flang/test/Semantics/OpenMP/loop-transformation-construct03.f90
 create mode 100644 flang/test/Semantics/OpenMP/loop-transformation-construct04.f90

diff --git a/flang/include/flang/Parser/openmp-utils.h b/flang/include/flang/Parser/openmp-utils.h
index 36556f8dd7f4a..7396e57144b90 100644
--- a/flang/include/flang/Parser/openmp-utils.h
+++ b/flang/include/flang/Parser/openmp-utils.h
@@ -123,6 +123,9 @@ template <typename T> OmpDirectiveName GetOmpDirectiveName(const T &x) {
 const OpenMPDeclarativeConstruct *GetOmp(const DeclarationConstruct &x);
 const OpenMPConstruct *GetOmp(const ExecutionPartConstruct &x);
 
+const OpenMPLoopConstruct *GetOmpLoop(const ExecutionPartConstruct &x);
+const DoConstruct *GetDoConstruct(const ExecutionPartConstruct &x);
+
 const OmpObjectList *GetOmpObjectList(const OmpClause &clause);
 
 template <typename T>
diff --git a/flang/include/flang/Semantics/openmp-directive-sets.h b/flang/include/flang/Semantics/openmp-directive-sets.h
index 01e8481e05721..609a7be700c28 100644
--- a/flang/include/flang/Semantics/openmp-directive-sets.h
+++ b/flang/include/flang/Semantics/openmp-directive-sets.h
@@ -275,10 +275,17 @@ static const OmpDirectiveSet loopConstructSet{
     Directive::OMPD_teams_distribute_parallel_do_simd,
     Directive::OMPD_teams_distribute_simd,
     Directive::OMPD_teams_loop,
+    Directive::OMPD_fuse,
     Directive::OMPD_tile,
     Directive::OMPD_unroll,
 };
 
+static const OmpDirectiveSet loopTransformationSet{
+    Directive::OMPD_tile,
+    Directive::OMPD_unroll,
+    Directive::OMPD_fuse,
+};
+
 static const OmpDirectiveSet nonPartialVarSet{
     Directive::OMPD_allocate,
     Directive::OMPD_allocators,
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index c6487349c4056..b6efa8592c678 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -3507,6 +3507,13 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
   case llvm::omp::Directive::OMPD_tile:
     genTileOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
     break;
+  case llvm::omp::Directive::OMPD_fuse: {
+    unsigned version = semaCtx.langOptions().OpenMPVersion;
+    if (!semaCtx.langOptions().OpenMPSimd)
+      TODO(loc, "Unhandled loop directive (" +
+                    llvm::omp::getOpenMPDirectiveName(dir, version) + ")");
+    break;
+  }
   case llvm::omp::Directive::OMPD_unroll:
     genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
     break;
@@ -3962,22 +3969,24 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
 
   mlir::Location currentLocation = converter.genLocation(beginSpec.source);
 
-  if (const parser::OpenMPLoopConstruct *ompNestedLoopCons =
-          loopConstruct.GetNestedConstruct()) {
-    llvm::omp::Directive nestedDirective =
-        parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v;
-    switch (nestedDirective) {
-    case llvm::omp::Directive::OMPD_tile:
-      // Skip OMPD_tile since the tile sizes will be retrieved when
-      // generating the omp.loop_nest op.
-      break;
-    default: {
-      unsigned version = semaCtx.langOptions().OpenMPVersion;
-      TODO(currentLocation,
-           "Applying a loop-associated on the loop generated by the " +
-               llvm::omp::getOpenMPDirectiveName(nestedDirective, version) +
-               " construct");
-    }
+  for (auto &construct : std::get<parser::Block>(loopConstruct.t)) {
+    if (const parser::OpenMPLoopConstruct *ompNestedLoopCons =
+            parser::omp::GetOmpLoop(construct)) {
+      llvm::omp::Directive nestedDirective =
+          parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v;
+      switch (nestedDirective) {
+      case llvm::omp::Directive::OMPD_tile:
+        // Skip OMPD_tile since the tile sizes will be retrieved when
+        // generating the omp.loop_nest op.
+        break;
+      default: {
+        unsigned version = semaCtx.langOptions().OpenMPVersion;
+        TODO(currentLocation,
+             "Applying a loop-associated on the loop generated by the " +
+                 llvm::omp::getOpenMPDirectiveName(nestedDirective, version) +
+                 " construct");
+      }
+      }
     }
   }
 
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index e2da60ed19de8..231eea8841d4b 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -2260,6 +2260,7 @@ static constexpr DirectiveSet GetLoopDirectives() {
       unsigned(Directive::OMPD_teams_distribute_parallel_do_simd),
       unsigned(Directive::OMPD_teams_distribute_simd),
       unsigned(Directive::OMPD_teams_loop),
+      unsigned(Directive::OMPD_fuse),
       unsigned(Directive::OMPD_tile),
       unsigned(Directive::OMPD_unroll),
   };
diff --git a/flang/lib/Parser/openmp-utils.cpp b/flang/lib/Parser/openmp-utils.cpp
index 2424828293c73..dfe8dbdd5ac9e 100644
--- a/flang/lib/Parser/openmp-utils.cpp
+++ b/flang/lib/Parser/openmp-utils.cpp
@@ -41,6 +41,23 @@ const OpenMPConstruct *GetOmp(const ExecutionPartConstruct &x) {
   return nullptr;
 }
 
+const OpenMPLoopConstruct *GetOmpLoop(const ExecutionPartConstruct &x) {
+  if (auto *construct{GetOmp(x)}) {
+    if (auto *omp{std::get_if<OpenMPLoopConstruct>(&construct->u)}) {
+      return omp;
+    }
+  }
+  return nullptr;
+}
+const DoConstruct *GetDoConstruct(const ExecutionPartConstruct &x) {
+  if (auto *y{std::get_if<ExecutableConstruct>(&x.u)}) {
+    if (auto *z{std::get_if<common::Indirection<DoConstruct>>(&y->u)}) {
+      return &z->value();
+    }
+  }
+  return nullptr;
+}
+
 const OmpObjectList *GetOmpObjectList(const OmpClause &clause) {
   // Clauses with OmpObjectList as its data member
   using MemberObjectListClauses = std::tuple<OmpClause::Copyin,
diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp
index 0cec1969e0978..f7c53d6d8f4c4 100644
--- a/flang/lib/Semantics/canonicalize-omp.cpp
+++ b/flang/lib/Semantics/canonicalize-omp.cpp
@@ -9,6 +9,7 @@
 #include "canonicalize-omp.h"
 #include "flang/Parser/parse-tree-visitor.h"
 #include "flang/Parser/parse-tree.h"
+#include "flang/Semantics/openmp-directive-sets.h"
 #include "flang/Semantics/semantics.h"
 
 // After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP
@@ -136,20 +137,30 @@ class CanonicalizationOfOmp {
           "A DO loop must follow the %s directive"_err_en_US,
           parser::ToUpperCaseLetters(dirName.source.ToString()));
     };
-    auto tileUnrollError = [](const parser::OmpDirectiveName &dirName,
-                               parser::Messages &messages) {
+    auto transformUnrollError = [](const parser::OmpDirectiveName &dirName,
+                                    parser::Messages &messages) {
       messages.Say(dirName.source,
-          "If a loop construct has been fully unrolled, it cannot then be tiled"_err_en_US,
+          "If a loop construct has been fully unrolled, it cannot then be further transformed"_err_en_US,
           parser::ToUpperCaseLetters(dirName.source.ToString()));
     };
+    auto missingEndFuse = [](auto &dir, auto &messages) {
+      messages.Say(dir.source,
+          "The %s construct requires the END FUSE directive"_err_en_US,
+          parser::ToUpperCaseLetters(dir.source.ToString()));
+    };
+
+    bool endFuseNeeded = beginName.v == llvm::omp::Directive::OMPD_fuse;
 
     auto &body{std::get<parser::Block>(x.t)};
 
     nextIt = it;
-    while (++nextIt != block.end()) {
+    nextIt++;
+    while (nextIt != block.end()) {
       // Ignore compiler directives.
-      if (GetConstructIf<parser::CompilerDirective>(*nextIt))
+      if (GetConstructIf<parser::CompilerDirective>(*nextIt)) {
+        nextIt++;
         continue;
+      }
 
       if (auto *doCons{GetConstructIf<parser::DoConstruct>(*nextIt)}) {
         if (doCons->GetLoopControl()) {
@@ -160,9 +171,12 @@ class CanonicalizationOfOmp {
           if (nextIt != block.end()) {
             if (auto *endDir{
                     GetConstructIf<parser::OmpEndLoopDirective>(*nextIt)}) {
-              std::get<std::optional<parser::OmpEndLoopDirective>>(x.t) =
-                  std::move(*endDir);
-              nextIt = block.erase(nextIt);
+              auto &endDirName = endDir->DirName();
+              if (endDirName.v != llvm::omp::Directive::OMPD_fuse) {
+                std::get<std::optional<parser::OmpEndLoopDirective>>(x.t) =
+                    std::move(*endDir);
+                nextIt = block.erase(nextIt);
+              }
             }
           }
         } else {
@@ -172,50 +186,45 @@ class CanonicalizationOfOmp {
         }
       } else if (auto *ompLoopCons{
                      GetOmpIf<parser::OpenMPLoopConstruct>(*nextIt)}) {
-        // We should allow UNROLL and TILE constructs to be inserted between an
-        // OpenMP Loop Construct and the DO loop itself
+        // We should allow loop transformation constructs to be inserted between
+        // an OpenMP Loop Construct and the DO loop itself
         auto &nestedBeginDirective = ompLoopCons->BeginDir();
         auto &nestedBeginName = nestedBeginDirective.DirName();
-        if ((nestedBeginName.v == llvm::omp::Directive::OMPD_unroll ||
-                nestedBeginName.v == llvm::omp::Directive::OMPD_tile) &&
-            !(nestedBeginName.v == llvm::omp::Directive::OMPD_unroll &&
-                beginName.v == llvm::omp::Directive::OMPD_tile)) {
-          // iterate through the remaining block items to find the end directive
-          // for the unroll/tile directive.
-          parser::Block::iterator endIt;
-          endIt = nextIt;
-          while (endIt != block.end()) {
-            if (auto *endDir{
-                    GetConstructIf<parser::OmpEndLoopDirective>(*endIt)}) {
-              auto &endDirName = endDir->DirName();
-              if (endDirName.v == beginName.v) {
-                std::get<std::optional<parser::OmpEndLoopDirective>>(x.t) =
-                    std::move(*endDir);
-                endIt = block.erase(endIt);
-                continue;
+        if (llvm::omp::loopTransformationSet.test(nestedBeginName.v)) {
+          if (nestedBeginName.v == llvm::omp::Directive::OMPD_unroll &&
+              llvm::omp::loopTransformationSet.test(beginName.v)) {
+            // if a loop has been unrolled, the user can not then transform that
+            // loop as it has been unrolled
+            const parser::OmpClauseList &unrollClauseList{
+                nestedBeginDirective.Clauses()};
+            if (unrollClauseList.v.empty()) {
+              // if the clause list is empty for an unroll construct, we assume
+              // the loop is being fully unrolled
+              transformUnrollError(beginName, messages_);
+            } else {
+              // parse the clauses for the unroll directive to find the full
+              // clause
+              for (auto &clause : unrollClauseList.v) {
+                if (clause.Id() == llvm::omp::OMPC_full) {
+                  transformUnrollError(beginName, messages_);
+                }
               }
             }
-            ++endIt;
           }
           RewriteOpenMPLoopConstruct(*ompLoopCons, block, nextIt);
           body.push_back(std::move(*nextIt));
           nextIt = block.erase(nextIt);
-        } else if (nestedBeginName.v == llvm::omp::Directive::OMPD_unroll &&
-            beginName.v == llvm::omp::Directive::OMPD_tile) {
-          // if a loop has been unrolled, the user can not then tile that loop
-          // as it has been unrolled
-          const parser::OmpClauseList &unrollClauseList{
-              nestedBeginDirective.Clauses()};
-          if (unrollClauseList.v.empty()) {
-            // if the clause list is empty for an unroll construct, we assume
-            // the loop is being fully unrolled
-            tileUnrollError(beginName, messages_);
-          } else {
-            // parse the clauses for the unroll directive to find the full
-            // clause
-            for (auto &clause : unrollClauseList.v) {
-              if (clause.Id() == llvm::omp::OMPC_full) {
-                tileUnrollError(beginName, messages_);
+          // check the following block item to find the end directive
+          // for the loop transform directive.
+          if (nextIt != block.end()) {
+            if (auto *endDir{
+                    GetConstructIf<parser::OmpEndLoopDirective>(*nextIt)}) {
+              auto &endDirName = endDir->DirName();
+              if (endDirName.v == beginName.v &&
+                  endDirName.v != llvm::omp::Directive::OMPD_fuse) {
+                std::get<std::optional<parser::OmpEndLoopDirective>>(x.t) =
+                    std::move(*endDir);
+                nextIt = block.erase(nextIt);
               }
             }
           }
@@ -227,11 +236,29 @@ class CanonicalizationOfOmp {
       } else {
         missingDoConstruct(beginName, messages_);
       }
+
+      if (endFuseNeeded && nextIt != block.end()) {
+        if (auto *endDir{
+                GetConstructIf<parser::OmpEndLoopDirective>(*nextIt)}) {
+          auto &endDirName = endDir->DirName();
+          if (endDirName.v == llvm::omp::Directive::OMPD_fuse) {
+            endFuseNeeded = false;
+            std::get<std::optional<parser::OmpEndLoopDirective>>(x.t) =
+                std::move(*endDir);
+            nextIt = block.erase(nextIt);
+          }
+        }
+      }
+      if (endFuseNeeded)
+        continue;
       // If we get here, we either found a loop, or issued an error message.
       return;
     }
     if (nextIt == block.end()) {
-      missingDoConstruct(beginName, messages_);
+      if (endFuseNeeded)
+        missingEndFuse(beginName, messages_);
+      else
+        missingDoConstruct(beginName, messages_);
     }
   }
 
diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp
index 3d3596b500880..13581008433a6 100644
--- a/flang/lib/Semantics/check-omp-loop.cpp
+++ b/flang/lib/Semantics/check-omp-loop.cpp
@@ -285,9 +285,11 @@ void OmpStructureChecker::Enter(const parser::OpenMPLoopConstruct &x) {
   }
   SetLoopInfo(x);
 
-  if (const auto *doConstruct{x.GetNestedLoop()}) {
-    const auto &doBlock{std::get<parser::Block>(doConstruct->t)};
-    CheckNoBranching(doBlock, beginName.v, beginName.source);
+  for (auto &construct : std::get<parser::Block>(x.t)) {
+    if (const auto *doConstruct{parser::omp::GetDoConstruct(construct)}) {
+      const auto &doBlock{std::get<parser::Block>(doConstruct->t)};
+      CheckNoBranching(doBlock, beginName.v, beginName.source);
+    }
   }
   CheckLoopItrVariableIsInt(x);
   CheckAssociatedLoopConstraints(x);
@@ -301,6 +303,11 @@ void OmpStructureChecker::Enter(const parser::OpenMPLoopConstruct &x) {
       beginName.v == llvm::omp::Directive::OMPD_distribute_simd) {
     CheckDistLinear(x);
   }
+  if (beginName.v == llvm::omp::Directive::OMPD_fuse) {
+    CheckLooprangeBounds(x);
+  } else {
+    CheckNestedFuse(x);
+  }
 }
 
 const parser::Name OmpStructureChecker::GetLoopIndex(
@@ -320,24 +327,28 @@ void OmpStructureChecker::SetLoopInfo(const parser::OpenMPLoopConstruct &x) {
 
 void OmpStructureChecker::CheckLoopItrVariableIsInt(
     const parser::OpenMPLoopConstruct &x) {
-  for (const parser::DoConstruct *loop{x.GetNestedLoop()}; loop;) {
-    if (loop->IsDoNormal()) {
-      const parser::Name &itrVal{GetLoopIndex(loop)};
-      if (itrVal.symbol) {
-        const auto *type{itrVal.symbol->GetType()};
-        if (!type->IsNumeric(TypeCategory::Integer)) {
-          context_.Say(itrVal.source,
-              "The DO loop iteration"
-              " variable must be of the type integer."_err_en_US,
-              itrVal.ToString());
+  for (auto &construct : std::get<parser::Block>(x.t)) {
+    for (const parser::DoConstruct *loop{
+             parser::omp::GetDoConstruct(construct)};
+        loop;) {
+      if (loop->IsDoNormal()) {
+        const parser::Name &itrVal{GetLoopIndex(loop)};
+        if (itrVal.symbol) {
+          const auto *type{itrVal.symbol->GetType()};
+          if (!type->IsNumeric(TypeCategory::Integer)) {
+            context_.Say(itrVal.source,
+                "The DO loop iteration"
+                " variable must be of the type integer."_err_en_US,
+                itrVal.ToString());
+          }
         }
       }
+      // Get the next DoConstruct if block is not empty.
+      const auto &block{std::get<parser::Block>(loop->t)};
+      const auto it{block.begin()};
+      loop = it != block.end() ? parser::Unwrap<parser::DoConstruct>(*it)
+                               : nullptr;
     }
-    // Get the next DoConstruct if block is not empty.
-    const auto &block{std::get<parser::Block>(loop->t)};
-    const auto it{block.begin()};
-    loop =
-        it != block.end() ? parser::Unwrap<parser::DoConstruct>(*it) : nullptr;
   }
 }
 
@@ -401,23 +412,28 @@ void OmpStructureChecker::CheckDistLinear(
 
     // Match the loop index variables with the collected symbols from linear
     // clauses.
-    for (const parser::DoConstruct *loop{x.GetNestedLoop()}; loop;) {
-      if (loop->IsDoNormal()) {
-        const parser::Name &itrVal{GetLoopIndex(loop)};
-        if (itrVal.symbol) {
-          // Remove the symbol from the collected set
-          indexVars.erase(&itrVal.symbol->GetUltimate());
-        }
-        collapseVal--;
-        if (collapseVal == 0) {
-          break;
+    for (auto &construct : std::get<parser::Block>(x.t)) {
+      std::int64_t curCollapseVal{collapseVal};
+      for (const parser::DoConstruct *loop{
+               parser::omp::GetDoConstruct(construct)};
+          loop;) {
+        if (loop->IsDoNormal()) {
+          const parser::Name &itrVal{GetLoopIndex(loop)};
+          if (itrVal.symbol) {
+            // Remove the symbol from the collected set
+            indexVars.erase(&itrVal.symbol->GetUltimate());
+          }
+          curCollapseVal--;
+          if (curCollapseVal == 0) {
+            break;
+          }
         }
+        // Get the next DoConstruct if block is not empty.
+        const auto &block{std::get<parser::Block>(loop->t)};
+        const auto it{block.begin()};
+        loop = it != block.end() ? parser::Unwrap<parser::DoConstruct>(*it)
+                                 : nullptr;
       }
-      // Get the next DoConstruct if block is not empty.
-      const auto &block{std::get<parser::Block>(loop->t)};
-      const auto it{block.begin()};
-      loop = it != block.end() ? parser::Unwrap<parser::DoConstruct>(*it)
-                               : nullptr;
     }
 
     // Show error for the remaining variables
@@ -430,6 +446,63 @@ void OmpStructureChecker::CheckDistLinear(
   }
 }
 
+void OmpStructureChecker::CheckLooprangeBounds(
+    const parser::OpenMPLoopConstruct &x) {
+  const parser::OmpClauseList &clauseList{x.BeginDir().Clauses()};
+  if (clauseList.v.empty()) {
+    return;
+  }
+  for (auto &clause : clauseList.v) {
+    if (const auto *lrClause{
+            std::get_if<parser::OmpClause::Looprange>(&clause.u)}) {
+      auto first{GetIntValue(std::get<0>((lrClause->v).t))};
+      auto count{GetIntValue(std::get<1>((lrClause->v).t))};
+      if (!first || !count) {
+        return;
+      }
+      auto &loopConsList{std::get<parser::Block>(x.t)};
+      if (*first > 0 && *count > 0 &&
+          loopConsList.size() < (unsigned)(*first + *count - 1)) {
+        context_.Say(clause.source,
+            "The loop range indicated in the %s clause must not be out of the bounds of the Loop Sequence following the construct."_err_en_US,
+            parser::ToUpperCaseLetters(clause.source.ToString()));
+      }
+      return;
+    }
+  }
+}
+
+void OmpStructureChecker::CheckNestedFuse(
+    const parser::OpenMPLoopConstruct &x) {
+  auto &loopConsList{std::get<parser::Block>(x.t)};
+  assert(loopConsList.size() == 1 && "Not Expecting a loop sequence");
+  const auto *ompConstruct{parser::omp::GetOmpLoop(loopConsList.front())};
+  if (!ompConstruct) {
+    return;
+  }
+  const parser::OmpClauseList &clauseList{ompConstruct->BeginDir().Clauses()};
+  if (clauseList.v.empty()) {
+    return;
+  }
+  for (auto &clause : clauseList.v) {
+    if (const auto *lrClause{
+            std::get_if<parser::OmpClause::Looprange>(&clause.u)}) {
+      auto count{GetIntValue(std::get<1>((lrClause->v).t))};
+      if (!count) {
+        return;
+      }
+      auto &nestedLoopConsList{std::get<parser::Block>(ompConstruct->t)};
+      if (nestedLoopConsList.size() > (unsigned)(*count)) {
+        context_.Say(x.BeginDir().DirName().source,
+            "The loop sequence following the %s construct must be fully fused first."_err_en_US,
+            parser::ToUpperCaseLetters(
+                x.BeginDir().DirName().source.ToString()));
+      }
+      return;
+    }
+  }
+}
+
 void OmpStructureChecker::Leave(const parser::OpenMPLoopConstruct &x) {
   const parser::OmpClauseList &clauseList{x.BeginDir().Clauses()};
 
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index 37b4404cc598f..63751fd0c8abd 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -3401,9 +3401,11 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Sizes &c) {
 }
 
 void OmpStructureChecker::Enter(const parser::OmpClause::Looprange &x) {
-  context_.Say(GetContext().clauseSource,
-      "LOOPRANGE clause is not implemented yet"_err_en_US,
-      ContextDirectiveAsFortran());
+  CheckAllowedClause(llvm::omp::Clause::OMPC_looprange);
+  auto &first = std::get<0>(x.v.t);
+  auto &count = std::get<1>(x.v.t);
+  RequiresConstantPositiveParameter(llvm::omp::Clause::OMPC_looprange, count);
+  RequiresConstantPositiveParameter(llvm::omp::Clause::OMPC_looprange, first);
 }
 
 // Restrictions specific to each clause are implemented apart from the
diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h
index 1b84bc5dda471..a4d74398378d2 100644
--- a/flang/lib/Semantics/check-omp-structure.h
+++ b/flang/lib/Semantics/check-omp-structure.h
@@ -316,6 +316,8 @@ class OmpStructureChecker : public OmpStructureCheckerBase {
   void CheckAtomicWrite(const parser::OpenMPAtomicConstruct &x);
   void CheckAtomicUpdate(const parser::OpenMPAtomicConstruct &x);
 
+  void CheckLooprangeBounds(const parser::OpenMPLoopConstruct &x);
+  void CheckNestedFuse(const parser::OpenMPLoopConstruct &x);
   void CheckDistLinear(const parser::OpenMPLoopConstruct &x);
   void CheckSIMDNest(const parser::OpenMPConstruct &x);
   void CheckTargetNest(const parser::OpenMPConstruct &x);
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index c4d103613b587..48b23ad077626 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -525,7 +525,10 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor<llvm::omp::Directive> {
   void Post(const parser::OpenMPSimpleStandaloneConstruct &) { PopContext(); }
 
   bool Pre(const parser::OpenMPLoopConstruct &);
-  void Post(const parser::OpenMPLoopConstruct &) { PopContext(); }
+  void Post(const parser::OpenMPLoopConstruct &) {
+    ordCollapseLevel++;
+    PopContext();
+  }
   void Post(const parser::OmpBeginLoopDirective &) {
     GetContext().withinConstruct = true;
   }
@@ -2028,6 +2031,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) {
   case llvm::omp::Directive::OMPD_teams_distribute_parallel_do_simd:
   case llvm::omp::Directive::OMPD_teams_distribute_simd:
   case llvm::omp::Directive::OMPD_teams_loop:
+  case llvm::omp::Directive::OMPD_fuse:
   case llvm::omp::Directive::OMPD_tile:
   case llvm::omp::Directive::OMPD_unroll:
     PushContext(beginName.source, beginName.v);
@@ -2205,8 +2209,11 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromInnerLoopContruct(
     const parser::OpenMPLoopConstruct &x,
     llvm::SmallVector<std::int64_t> &levels,
     llvm::SmallVector<const parser::OmpClause *> &clauses) {
-  if (auto *innerConstruct{x.GetNestedConstruct()}) {
-    CollectNumAffectedLoopsFromLoopConstruct(*innerConstruct, levels, clauses);
+  for (auto &construct : std::get<parser::Block>(x.t)) {
+    if (auto *innerConstruct{parser::omp::GetOmpLoop(construct)}) {
+      CollectNumAffectedLoopsFromLoopConstruct(
+          *innerConstruct, levels, clauses);
+    }
   }
 }
 
@@ -2271,74 +2278,74 @@ void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop(
 
   // Find the associated region by skipping nested loop-associated constructs
   // such as loop transformations
-  const parser::OpenMPLoopConstruct *innermostConstruct{&x};
-  while (auto *nested{innermostConstruct->GetNestedConstruct()}) {
-    innermostConstruct = nested;
-  }
-
-  const auto *outer{innermostConstruct->GetNestedLoop()};
-  if (!outer)
-    return;
-
-  llvm::SmallVector<Symbol *> ivs;
-  int curLevel{0};
-  const parser::DoConstruct *loop{outer};
-  while (true) {
-    auto [iv, lb, ub, step] = GetLoopBounds(*loop);
-
-    if (lb)
-      checkExprHasSymbols(ivs, lb);
-    if (ub)
-      checkExprHasSymbols(ivs, ub);
-    if (step)
-      checkExprHasSymbols(ivs, step);
-    if (iv) {
-      if (auto *symbol{currScope().FindSymbol(iv->source)})
-        ivs.push_back(symbol);
-    }
+  for (auto &construct : std::get<parser::Block>(x.t)) {
+    if (const auto *innermostConstruct{parser::omp::GetOmpLoop(construct)}) {
+      CheckPerfectNestAndRectangularLoop(*innermostConstruct);
+    } else if (const auto *doConstruct{
+                   parser::omp::GetDoConstruct(construct)}) {
+
+      llvm::SmallVector<Symbol *> ivs;
+      int curLevel{0};
+      const auto *loop{doConstruct};
+      while (true) {
+        auto [iv, lb, ub, step] = GetLoopBounds(*loop);
+
+        if (lb)
+          checkExprHasSymbols(ivs, lb);
+        if (ub)
+          checkExprHasSymbols(ivs, ub);
+        if (step)
+          checkExprHasSymbols(ivs, step);
+        if (iv) {
+          if (auto *symbol{currScope().FindSymbol(iv->source)})
+            ivs.push_back(symbol);
+        }
 
-    // Stop after processing all affected loops
-    if (curLevel + 1 >= dirDepth)
-      break;
+        // Stop after processing all affected loops
+        if (curLevel + 1 >= dirDepth)
+          break;
 
-    // Recurse into nested loop
-    const auto &block{std::get<parser::Block>(loop->t)};
-    if (block.empty()) {
-      // Insufficient number of nested loops already reported by
-      // CheckAssocLoopLevel()
-      break;
-    }
+        // Recurse into nested loop
+        const auto &block{std::get<parser::Block>(loop->t)};
+        if (block.empty()) {
+          // Insufficient number of nested loops already reported by
+          // CheckAssocLoopLevel()
+          break;
+        }
 
-    loop = GetDoConstructIf(block.front());
-    if (!loop) {
-      // Insufficient number of nested loops already reported by
-      // CheckAssocLoopLevel()
-      break;
-    }
+        loop = GetDoConstructIf(block.front());
+        if (!loop) {
+          // Insufficient number of nested loops already reported by
+          // CheckAssocLoopLevel()
+          break;
+        }
 
-    auto checkPerfectNest = [&, this]() {
-      if (block.empty())
-        return;
-      auto last = block.end();
-      --last;
+        auto checkPerfectNest = [&, this]() {
+          if (block.empty())
+            return;
+          auto last = block.end();
+          --last;
 
-      // A trailing CONTINUE is not considered part of the loop body
-      if (parser::Unwrap<parser::ContinueStmt>(*last))
-        --last;
+          // A trailing CONTINUE is not considered part of the loop body
+          if (parser::Unwrap<parser::ContinueStmt>(*last))
+            --last;
 
-      // In a perfectly nested loop, the nested loop must be the only statement
-      if (last == block.begin())
-        return;
+          // In a perfectly nested loop, the nested loop must be the only
+          // statement
+          if (last == block.begin())
+            return;
 
-      // Non-perfectly nested loop
-      // TODO: Point to non-DO statement, directiveSource as a note
-      context_.Say(dirContext.directiveSource,
-          "Canonical loop nest must be perfectly nested."_err_en_US);
-    };
+          // Non-perfectly nested loop
+          // TODO: Point to non-DO statement, directiveSource as a note
+          context_.Say(dirContext.directiveSource,
+              "Canonical loop nest must be perfectly nested."_err_en_US);
+        };
 
-    checkPerfectNest();
+        checkPerfectNest();
 
-    ++curLevel;
+        ++curLevel;
+      }
+    }
   }
 }
 
@@ -2372,50 +2379,51 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
   bool hasCollapseClause{
       clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false};
 
-  const parser::OpenMPLoopConstruct *innerMostNest = &x;
-  while (auto *nested{innerMostNest->GetNestedConstruct()}) {
-    innerMostNest = nested;
-  }
-
-  if (const auto *outer{innerMostNest->GetNestedLoop()}) {
-    for (const parser::DoConstruct *loop{&*outer}; loop && level > 0; --level) {
-      if (loop->IsDoConcurrent()) {
-        // DO CONCURRENT is explicitly allowed for the LOOP construct so long
-        // as there isn't a COLLAPSE clause
-        if (isLoopConstruct) {
-          if (hasCollapseClause) {
-            // hasCollapseClause implies clause != nullptr
-            context_.Say(clause->source,
-                "DO CONCURRENT loops cannot be used with the COLLAPSE clause."_err_en_US);
+  for (auto &construct : std::get<parser::Block>(x.t)) {
+    if (const auto *innermostConstruct{parser::omp::GetOmpLoop(construct)}) {
+      PrivatizeAssociatedLoopIndexAndCheckLoopLevel(*innermostConstruct);
+    } else if (const auto *doConstruct{
+                   parser::omp::GetDoConstruct(construct)}) {
+      for (const parser::DoConstruct *loop{&*doConstruct}; loop && level > 0;
+          --level) {
+        if (loop->IsDoConcurrent()) {
+          // DO CONCURRENT is explicitly allowed for the LOOP construct so long
+          // as there isn't a COLLAPSE clause
+          if (isLoopConstruct) {
+            if (hasCollapseClause) {
+              // hasCollapseClause implies clause != nullptr
+              context_.Say(clause->source,
+                  "DO CONCURRENT loops cannot be used with the COLLAPSE clause."_err_en_US);
+            }
+          } else {
+            auto &stmt =
+                std::get<parser::Statement<parser::NonLabelDoStmt>>(loop->t);
+            context_.Say(stmt.source,
+                "DO CONCURRENT loops cannot form part of a loop nest."_err_en_US);
           }
-        } else {
-          auto &stmt =
-              std::get<parser::Statement<parser::NonLabelDoStmt>>(loop->t);
-          context_.Say(stmt.source,
-              "DO CONCURRENT loops cannot form part of a loop nest."_err_en_US);
-        }
-      }
-      // go through all the nested do-loops and resolve index variables
-      const parser::Name *iv{GetLoopIndex(*loop)};
-      if (iv) {
-        if (auto *symbol{ResolveOmp(*iv, ivDSA, currScope())}) {
-          SetSymbolDSA(*symbol, {Symbol::Flag::OmpPreDetermined, ivDSA});
-          iv->symbol = symbol; // adjust the symbol within region
-          AddToContextObjectWithDSA(*symbol, ivDSA);
         }
+        // go through all the nested do-loops and resolve index variables
+        const parser::Name *iv{GetLoopIndex(*loop)};
+        if (iv) {
+          if (auto *symbol{ResolveOmp(*iv, ivDSA, currScope())}) {
+            SetSymbolDSA(*symbol, {Symbol::Flag::OmpPreDetermined, ivDSA});
+            iv->symbol = symbol; // adjust the symbol within region
+            AddToContextObjectWithDSA(*symbol, ivDSA);
+          }
 
-        const auto &block{std::get<parser::Block>(loop->t)};
-        const auto it{block.begin()};
-        loop = it != block.end() ? GetDoConstructIf(*it) : nullptr;
+          const auto &block{std::get<parser::Block>(loop->t)};
+          const auto it{block.begin()};
+          loop = it != block.end() ? GetDoConstructIf(*it) : nullptr;
+        }
       }
+      CheckAssocLoopLevel(level, GetAssociatedClause());
+    } else {
+      context_.Say(GetContext().directiveSource,
+          "A DO loop must follow the %s directive"_err_en_US,
+          parser::ToUpperCaseLetters(
+              llvm::omp::getOpenMPDirectiveName(GetContext().directive, version)
+                  .str()));
     }
-    CheckAssocLoopLevel(level, GetAssociatedClause());
-  } else {
-    context_.Say(GetContext().directiveSource,
-        "A DO loop must follow the %s directive"_err_en_US,
-        parser::ToUpperCaseLetters(
-            llvm::omp::getOpenMPDirectiveName(GetContext().directive, version)
-                .str()));
   }
 }
 
diff --git a/flang/lib/Semantics/rewrite-parse-tree.cpp b/flang/lib/Semantics/rewrite-parse-tree.cpp
index b5a07680a3377..285eaac1e2c8f 100644
--- a/flang/lib/Semantics/rewrite-parse-tree.cpp
+++ b/flang/lib/Semantics/rewrite-parse-tree.cpp
@@ -9,6 +9,7 @@
 #include "rewrite-parse-tree.h"
 
 #include "flang/Common/indirection.h"
+#include "flang/Parser/openmp-utils.h"
 #include "flang/Parser/parse-tree-visitor.h"
 #include "flang/Parser/parse-tree.h"
 #include "flang/Parser/tools.h"
@@ -195,18 +196,24 @@ void RewriteMutator::OpenMPSimdOnly(
             ++it;
             continue;
           }
-          if (auto *doConstruct =
-                  const_cast<parser::DoConstruct *>(ompLoop->GetNestedLoop())) {
-            auto &loopBody = std::get<parser::Block>(doConstruct->t);
-            // We can only remove some constructs from a loop when it's _not_ a
-            // OpenMP simd loop
-            OpenMPSimdOnly(const_cast<parser::Block &>(loopBody),
-                /*isNonSimdLoopBody=*/true);
-
-            auto newLoop = parser::ExecutionPartConstruct{
-                parser::ExecutableConstruct{std::move(*doConstruct)}};
+          std::list<parser::ExecutionPartConstruct> doList;
+          for (auto &construct : std::get<parser::Block>(ompLoop->t)) {
+            if (auto *doConstruct = const_cast<parser::DoConstruct *>(
+                    parser::omp::GetDoConstruct(construct))) {
+              auto &loopBody = std::get<parser::Block>(doConstruct->t);
+              // We can only remove some constructs from a loop when it's _not_
+              // a OpenMP simd loop
+              OpenMPSimdOnly(const_cast<parser::Block &>(loopBody),
+                  /*isNonSimdLoopBody=*/true);
+              auto newLoop = parser::ExecutionPartConstruct{
+                  parser::ExecutableConstruct{std::move(*doConstruct)}};
+              doList.insert(doList.end(), std::move(newLoop));
+            }
+          }
+          if (!doList.empty()) {
             it = block.erase(it);
-            block.insert(it, std::move(newLoop));
+            for (auto &newLoop : doList)
+              block.insert(it, std::move(newLoop));
             continue;
           }
         } else if (auto *ompCon{std::get_if<parser::OpenMPSectionsConstruct>(
@@ -384,10 +391,12 @@ bool RewriteMutator::Pre(parser::OpenMPLoopConstruct &ompLoop) {
     // If we're looking at a non-simd OpenMP loop, we need to explicitly
     // call OpenMPSimdOnly on the nested loop block while indicating where
     // the block comes from.
-    if (auto *doConstruct =
-            const_cast<parser::DoConstruct *>(ompLoop.GetNestedLoop())) {
-      auto &innerBlock = std::get<parser::Block>(doConstruct->t);
-      OpenMPSimdOnly(innerBlock, /*isNonSimdLoopBody=*/true);
+    for (auto &construct : std::get<parser::Block>(ompLoop.t)) {
+      if (auto *doConstruct = parser::omp::GetDoConstruct(construct)) {
+        auto &innerBlock = std::get<parser::Block>(doConstruct->t);
+        OpenMPSimdOnly(const_cast<parser::Block &>(innerBlock),
+            /*isNonSimdLoopBody=*/true);
+      }
     }
   }
   return true;
diff --git a/flang/test/Parser/OpenMP/fail-looprange.f90 b/flang/test/Parser/OpenMP/fail-looprange.f90
new file mode 100644
index 0000000000000..ebe3480b44f12
--- /dev/null
+++ b/flang/test/Parser/OpenMP/fail-looprange.f90
@@ -0,0 +1,11 @@
+! RUN: not %flang_fc1 -fsyntax-only -fopenmp %s 2>&1 | FileCheck %s
+
+! CHECK: error: expected end of line
+!$omp fuse looprange
+
+! CHECK: error: expected end of line
+!$omp fuse looprange(1)
+
+! CHECK: error: expected end of line
+!$omp fuse looprange(1,2,3)
+end
diff --git a/flang/test/Parser/OpenMP/fuse-looprange.f90 b/flang/test/Parser/OpenMP/fuse-looprange.f90
new file mode 100644
index 0000000000000..75ec15fddd65f
--- /dev/null
+++ b/flang/test/Parser/OpenMP/fuse-looprange.f90
@@ -0,0 +1,38 @@
+! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=60 %s | FileCheck --ignore-case %s
+! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=60  %s | FileCheck --check-prefix="PARSE-TREE" %s
+
+subroutine openmp_fuse(x)
+
+  integer, intent(inout)::x
+
+!CHECK: !$omp fuse looprange
+!$omp  fuse looprange(1,2)
+!CHECK: do
+  do x = 1, 100
+  	call F1()
+!CHECK: end do
+  end do
+!CHECK: do
+  do x = 1, 100
+  	call F1()
+!CHECK: end do
+  end do
+!CHECK: do
+  do x = 1, 100
+  	call F1()
+!CHECK: end do
+  end do
+!CHECK: !$omp end fuse
+!$omp end fuse
+
+!PARSE-TREE: OpenMPConstruct -> OpenMPLoopConstruct
+!PARSE-TREE: OmpBeginLoopDirective
+!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = fuse
+!PARSE-TREE: OmpClauseList -> OmpClause -> Looprange -> OmpLoopRangeClause
+!PARSE-TREE: Scalar -> Integer -> Constant -> Expr = '1_4'
+!PARSE-TREE: LiteralConstant -> IntLiteralConstant = '1'
+!PARSE-TREE: Scalar -> Integer -> Constant -> Expr = '2_4'
+!PARSE-TREE: LiteralConstant -> IntLiteralConstant = '2'
+
+END subroutine openmp_fuse
+
diff --git a/flang/test/Parser/OpenMP/fuse01.f90 b/flang/test/Parser/OpenMP/fuse01.f90
new file mode 100644
index 0000000000000..98ce0e33797b5
--- /dev/null
+++ b/flang/test/Parser/OpenMP/fuse01.f90
@@ -0,0 +1,28 @@
+! RUN: %flang_fc1 -fdebug-unparse -fopenmp %s | FileCheck --ignore-case %s
+! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s
+
+subroutine openmp_fuse(x)
+
+  integer, intent(inout)::x
+
+!CHECK: !$omp fuse
+!$omp  fuse
+!CHECK: do
+  do x = 1, 100
+  	call F1()
+!CHECK: end do
+  end do
+!CHECK: do
+  do x = 1, 100
+  	call F1()
+!CHECK: end do
+  end do
+!CHECK: !$omp end fuse
+!$omp end fuse
+
+!PARSE-TREE: OpenMPConstruct -> OpenMPLoopConstruct
+!PARSE-TREE: OmpBeginLoopDirective
+!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = fuse
+
+END subroutine openmp_fuse
+
diff --git a/flang/test/Parser/OpenMP/fuse02.f90 b/flang/test/Parser/OpenMP/fuse02.f90
new file mode 100644
index 0000000000000..cc3de48dd658a
--- /dev/null
+++ b/flang/test/Parser/OpenMP/fuse02.f90
@@ -0,0 +1,97 @@
+! Test the Parse Tree to ensure the OpenMP Loop Transformation Construct Fuse can be constructed on another Fuse
+
+! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck %s --check-prefix=CHECK-PARSE
+! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck %s --check-prefix=CHECK-UNPARSE
+
+subroutine fuse_on_fuse
+  implicit none
+  integer :: I = 10
+  integer :: j
+
+  !$omp fuse
+    !$omp fuse
+      do i = 1, I
+        continue
+      end do
+      do j = 1, I
+        continue
+      end do
+    !$omp end fuse
+    do j = 1, I
+      continue
+    end do
+  !$omp end fuse
+end subroutine
+
+!CHECK-PARSE: | ExecutionPart -> Block
+!CHECK-PARSE-NEXT: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+!CHECK-PARSE-NEXT: | | | OmpBeginLoopDirective
+!CHECK-PARSE-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = fuse
+!CHECK-PARSE-NEXT: | | | | OmpClauseList ->
+!CHECK-PARSE-NEXT: | | | | Flags = None
+!CHECK-PARSE-NEXT: | | | Block
+!CHECK-PARSE-NEXT: | | | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+!CHECK-PARSE-NEXT: | | | | | OmpBeginLoopDirective
+!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = fuse
+!CHECK-PARSE-NEXT: | | | | | | OmpClauseList ->
+!CHECK-PARSE-NEXT: | | | | | | Flags = None
+!CHECK-PARSE-NEXT: | | | | | Block
+!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+!CHECK-PARSE-NEXT: | | | | | | | NonLabelDoStmt
+!CHECK-PARSE-NEXT: | | | | | | | | LoopControl -> LoopBounds
+!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Name = 'i'
+!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = '1_4'
+!CHECK-PARSE-NEXT: | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1'
+!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = 'i'
+!CHECK-PARSE-NEXT: | | | | | | | | | | Designator -> DataRef -> Name = 'i'
+!CHECK-PARSE-NEXT: | | | | | | | Block
+!CHECK-PARSE-NEXT: | | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> ContinueStmt
+!CHECK-PARSE-NEXT: | | | | | | | EndDoStmt ->
+!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+!CHECK-PARSE-NEXT: | | | | | | | NonLabelDoStmt
+!CHECK-PARSE-NEXT: | | | | | | | | LoopControl -> LoopBounds
+!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Name = 'j'
+!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = '1_4'
+!CHECK-PARSE-NEXT: | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1'
+!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = 'i'
+!CHECK-PARSE-NEXT: | | | | | | | | | | Designator -> DataRef -> Name = 'i'
+!CHECK-PARSE-NEXT: | | | | | | | Block
+!CHECK-PARSE-NEXT: | | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> ContinueStmt
+!CHECK-PARSE-NEXT: | | | | | | | EndDoStmt ->
+!CHECK-PARSE-NEXT: | | | | | OmpEndLoopDirective
+!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = fuse
+!CHECK-PARSE-NEXT: | | | | | | OmpClauseList ->
+!CHECK-PARSE-NEXT: | | | | | | Flags = None
+!CHECK-PARSE-NEXT: | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+!CHECK-PARSE-NEXT: | | | | | NonLabelDoStmt
+!CHECK-PARSE-NEXT: | | | | | | LoopControl -> LoopBounds
+!CHECK-PARSE-NEXT: | | | | | | | Scalar -> Name = 'j'
+!CHECK-PARSE-NEXT: | | | | | | | Scalar -> Expr = '1_4'
+!CHECK-PARSE-NEXT: | | | | | | | | LiteralConstant -> IntLiteralConstant = '1'
+!CHECK-PARSE-NEXT: | | | | | | | Scalar -> Expr = 'i'
+!CHECK-PARSE-NEXT: | | | | | | | | Designator -> DataRef -> Name = 'i'
+!CHECK-PARSE-NEXT: | | | | | Block
+!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> ContinueStmt
+!CHECK-PARSE-NEXT: | | | | | EndDoStmt ->
+!CHECK-PARSE-NEXT: | | | OmpEndLoopDirective
+!CHECK-PARSE-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = fuse
+!CHECK-PARSE-NEXT: | | | | OmpClauseList ->
+!CHECK-PARSE-NEXT: | | | | Flags = None
+
+!CHECK-UNPARSE: SUBROUTINE fuse_on_fuse
+!CHECK-UNPARSE-NEXT:  IMPLICIT NONE
+!CHECK-UNPARSE-NEXT:  INTEGER :: i = 10_4
+!CHECK-UNPARSE-NEXT:  INTEGER j
+!CHECK-UNPARSE-NEXT: !$OMP FUSE
+!CHECK-UNPARSE-NEXT: !$OMP FUSE
+!CHECK-UNPARSE-NEXT:  DO i=1_4,i
+!CHECK-UNPARSE-NEXT:    CONTINUE
+!CHECK-UNPARSE-NEXT:  END DO
+!CHECK-UNPARSE-NEXT:  DO j=1_4,i
+!CHECK-UNPARSE-NEXT:    CONTINUE
+!CHECK-UNPARSE-NEXT:  END DO
+!CHECK-UNPARSE-NEXT: !$OMP END FUSE
+!CHECK-UNPARSE-NEXT:  DO j=1_4,i
+!CHECK-UNPARSE-NEXT:    CONTINUE
+!CHECK-UNPARSE-NEXT:  END DO
+!CHECK-UNPARSE-NEXT: !$OMP END FUSE
diff --git a/flang/test/Parser/OpenMP/loop-transformation-construct04.f90 b/flang/test/Parser/OpenMP/loop-transformation-construct04.f90
new file mode 100644
index 0000000000000..e37e2bbfe155b
--- /dev/null
+++ b/flang/test/Parser/OpenMP/loop-transformation-construct04.f90
@@ -0,0 +1,80 @@
+! Test the Parse Tree to ensure the OpenMP Loop Transformation Construct Fuse constructs a correct sequence.
+
+! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck %s --check-prefix=CHECK-PARSE
+! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck %s --check-prefix=CHECK-UNPARSE
+
+subroutine loop_transformation_construct
+  implicit none
+  integer :: I = 10
+  integer :: j
+
+  !$omp do
+  !$omp fuse
+  do i = 1, I
+    continue
+  end do
+  do j = 1, I
+    continue
+  end do
+  !$omp end fuse
+  !$omp end do
+end subroutine
+
+!CHECK-PARSE: | ExecutionPart -> Block
+!CHECK-PARSE-NEXT: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+!CHECK-PARSE-NEXT: | | | OmpBeginLoopDirective
+!CHECK-PARSE-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = do
+!CHECK-PARSE-NEXT: | | | | OmpClauseList ->
+!CHECK-PARSE-NEXT: | | | | Flags = None
+!CHECK-PARSE-NEXT: | | | Block
+!CHECK-PARSE-NEXT: | | | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+!CHECK-PARSE-NEXT: | | | | | OmpBeginLoopDirective
+!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = fuse
+!CHECK-PARSE-NEXT: | | | | | | OmpClauseList ->
+!CHECK-PARSE-NEXT: | | | | | | Flags = None
+!CHECK-PARSE-NEXT: | | | | | Block
+!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+!CHECK-PARSE-NEXT: | | | | | | | NonLabelDoStmt
+!CHECK-PARSE-NEXT: | | | | | | | | LoopControl -> LoopBounds
+!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Name = 'i'
+!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = '1_4'
+!CHECK-PARSE-NEXT: | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1'
+!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = 'i'
+!CHECK-PARSE-NEXT: | | | | | | | | | | Designator -> DataRef -> Name = 'i'
+!CHECK-PARSE-NEXT: | | | | | | | Block
+!CHECK-PARSE-NEXT: | | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> ContinueStmt
+!CHECK-PARSE-NEXT: | | | | | | | EndDoStmt ->
+!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+!CHECK-PARSE-NEXT: | | | | | | | NonLabelDoStmt
+!CHECK-PARSE-NEXT: | | | | | | | | LoopControl -> LoopBounds
+!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Name = 'j'
+!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = '1_4'
+!CHECK-PARSE-NEXT: | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1'
+!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = 'i'
+!CHECK-PARSE-NEXT: | | | | | | | | | | Designator -> DataRef -> Name = 'i'
+!CHECK-PARSE-NEXT: | | | | | | | Block
+!CHECK-PARSE-NEXT: | | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> ContinueStmt
+!CHECK-PARSE-NEXT: | | | | | | | EndDoStmt ->
+!CHECK-PARSE-NEXT: | | | | | OmpEndLoopDirective
+!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = fuse
+!CHECK-PARSE-NEXT: | | | | | | OmpClauseList ->
+!CHECK-PARSE-NEXT: | | | | | | Flags = None
+!CHECK-PARSE-NEXT: | | | OmpEndLoopDirective
+!CHECK-PARSE-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = do
+!CHECK-PARSE-NEXT: | | | | OmpClauseList ->
+!CHECK-PARSE-NEXT: | | | | Flags = None
+
+!CHECK-UNPARSE: SUBROUTINE loop_transformation_construct
+!CHECK-UNPARSE-NEXT:  IMPLICIT NONE
+!CHECK-UNPARSE-NEXT:  INTEGER :: i = 10_4
+!CHECK-UNPARSE-NEXT:  INTEGER j
+!CHECK-UNPARSE-NEXT: !$OMP DO
+!CHECK-UNPARSE-NEXT: !$OMP FUSE
+!CHECK-UNPARSE-NEXT:  DO i=1_4,i
+!CHECK-UNPARSE-NEXT:    CONTINUE
+!CHECK-UNPARSE-NEXT:  END DO
+!CHECK-UNPARSE-NEXT:  DO j=1_4,i
+!CHECK-UNPARSE-NEXT:    CONTINUE
+!CHECK-UNPARSE-NEXT:  END DO
+!CHECK-UNPARSE-NEXT: !$OMP END FUSE
+!CHECK-UNPARSE-NEXT: !$OMP END DO
diff --git a/flang/test/Parser/OpenMP/loop-transformation-construct05.f90 b/flang/test/Parser/OpenMP/loop-transformation-construct05.f90
new file mode 100644
index 0000000000000..6d3303841d506
--- /dev/null
+++ b/flang/test/Parser/OpenMP/loop-transformation-construct05.f90
@@ -0,0 +1,90 @@
+! Test the Parse Tree to ensure the OpenMP Loop Transformation Construct Fuse constructs a correct sequence
+! and can correctly combine with loop nests
+
+! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck %s --check-prefix=CHECK-PARSE
+! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck %s --check-prefix=CHECK-UNPARSE
+
+subroutine loop_transformation_construct
+  implicit none
+  integer :: I = 10
+  integer :: j
+
+  !$omp do
+  !$omp fuse
+  do i = 1, I
+    continue
+  end do
+  !$omp tile sizes(2)
+    do j = 1, I
+      continue
+    end do
+  !$omp end fuse
+  !$omp end do
+end subroutine
+
+!CHECK-PARSE: | ExecutionPart -> Block
+!CHECK-PARSE-NEXT: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+!CHECK-PARSE-NEXT: | | | OmpBeginLoopDirective
+!CHECK-PARSE-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = do
+!CHECK-PARSE-NEXT: | | | | OmpClauseList ->
+!CHECK-PARSE-NEXT: | | | | Flags = None
+!CHECK-PARSE-NEXT: | | | Block
+!CHECK-PARSE-NEXT: | | | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+!CHECK-PARSE-NEXT: | | | | | OmpBeginLoopDirective
+!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = fuse
+!CHECK-PARSE-NEXT: | | | | | | OmpClauseList ->
+!CHECK-PARSE-NEXT: | | | | | | Flags = None
+!CHECK-PARSE-NEXT: | | | | | Block
+!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+!CHECK-PARSE-NEXT: | | | | | | | NonLabelDoStmt
+!CHECK-PARSE-NEXT: | | | | | | | | LoopControl -> LoopBounds
+!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Name = 'i'
+!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = '1_4'
+!CHECK-PARSE-NEXT: | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1'
+!CHECK-PARSE-NEXT: | | | | | | | | | Scalar -> Expr = 'i'
+!CHECK-PARSE-NEXT: | | | | | | | | | | Designator -> DataRef -> Name = 'i'
+!CHECK-PARSE-NEXT: | | | | | | | Block
+!CHECK-PARSE-NEXT: | | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> ContinueStmt
+!CHECK-PARSE-NEXT: | | | | | | | EndDoStmt ->
+!CHECK-PARSE-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
+!CHECK-PARSE-NEXT: | | | | | | | OmpBeginLoopDirective
+!CHECK-PARSE-NEXT: | | | | | | | | OmpDirectiveName -> llvm::omp::Directive = tile
+!CHECK-PARSE-NEXT: | | | | | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4'
+!CHECK-PARSE-NEXT: | | | | | | | | | LiteralConstant -> IntLiteralConstant = '2'
+!CHECK-PARSE-NEXT: | | | | | | | | Flags = None
+!CHECK-PARSE-NEXT: | | | | | | | Block
+!CHECK-PARSE-NEXT: | | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+!CHECK-PARSE-NEXT: | | | | | | | | | NonLabelDoStmt
+!CHECK-PARSE-NEXT: | | | | | | | | | | LoopControl -> LoopBounds
+!CHECK-PARSE-NEXT: | | | | | | | | | | | Scalar -> Name = 'j'
+!CHECK-PARSE-NEXT: | | | | | | | | | | | Scalar -> Expr = '1_4'
+!CHECK-PARSE-NEXT: | | | | | | | | | | | | LiteralConstant -> IntLiteralConstant = '1'
+!CHECK-PARSE-NEXT: | | | | | | | | | | | Scalar -> Expr = 'i'
+!CHECK-PARSE-NEXT: | | | | | | | | | | | | Designator -> DataRef -> Name = 'i'
+!CHECK-PARSE-NEXT: | | | | | | | | | Block
+!CHECK-PARSE-NEXT: | | | | | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> ContinueStmt
+!CHECK-PARSE-NEXT: | | | | | | | | | EndDoStmt ->
+!CHECK-PARSE-NEXT: | | | | | OmpEndLoopDirective
+!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = fuse
+!CHECK-PARSE-NEXT: | | | | | | OmpClauseList ->
+!CHECK-PARSE-NEXT: | | | | | | Flags = None
+!CHECK-PARSE-NEXT: | | | OmpEndLoopDirective
+!CHECK-PARSE-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = do
+!CHECK-PARSE-NEXT: | | | | OmpClauseList ->
+!CHECK-PARSE-NEXT: | | | | Flags = None
+
+!CHECK-UNPARSE: SUBROUTINE loop_transformation_construct
+!CHECK-UNPARSE-NEXT:  IMPLICIT NONE
+!CHECK-UNPARSE-NEXT:  INTEGER :: i = 10_4
+!CHECK-UNPARSE-NEXT:  INTEGER j
+!CHECK-UNPARSE-NEXT: !$OMP DO
+!CHECK-UNPARSE-NEXT: !$OMP FUSE
+!CHECK-UNPARSE-NEXT:  DO i=1_4,i
+!CHECK-UNPARSE-NEXT:    CONTINUE
+!CHECK-UNPARSE-NEXT:  END DO
+!CHECK-UNPARSE-NEXT:  !$OMP TILE
+!CHECK-UNPARSE-NEXT:  DO j=1_4,i
+!CHECK-UNPARSE-NEXT:    CONTINUE
+!CHECK-UNPARSE-NEXT:  END DO
+!CHECK-UNPARSE-NEXT: !$OMP END FUSE
+!CHECK-UNPARSE-NEXT: !$OMP END DO
diff --git a/flang/test/Semantics/OpenMP/loop-transformation-clauses01.f90 b/flang/test/Semantics/OpenMP/loop-transformation-clauses01.f90
new file mode 100644
index 0000000000000..9ca0e8cfc9af1
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/loop-transformation-clauses01.f90
@@ -0,0 +1,66 @@
+! Testing the Semantics of clauses on loop transformation directives
+
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60
+
+
+subroutine loop_transformation_construct1
+  implicit none
+  integer, parameter:: i = 5
+  integer :: x
+  integer :: a
+  integer :: v(i)
+
+  !ERROR: At most one LOOPRANGE clause can appear on the FUSE directive
+  !$omp fuse looprange(1,2) looprange(1,2)
+  do x = 1, i
+    v(x) = x * 2
+  end do
+  do x = 1, i
+    v(x) = x * 2
+  end do
+  !$omp end fuse
+
+  !ERROR: The loop range indicated in the LOOPRANGE(5,2) clause must not be out of the bounds of the Loop Sequence following the construct.
+  !$omp fuse looprange(5,2)
+  do x = 1, i
+    v(x) = x * 2
+  end do
+  do x = 1, i
+    v(x) = x * 2
+  end do
+  !$omp end fuse
+
+  !ERROR: The parameter of the LOOPRANGE clause must be a constant positive integer expression
+  !$omp fuse looprange(0,1)
+  do x = 1, i
+    v(x) = x * 2
+  end do
+  do x = 1, i
+    v(x) = x * 2
+  end do
+  !$omp end fuse
+
+  !ERROR: The parameter of the LOOPRANGE clause must be a constant positive integer expression
+  !$omp fuse looprange(1,-1)
+  do x = 1, i
+    v(x) = x * 2
+  end do
+  do x = 1, i
+    v(x) = x * 2
+  end do
+  !$omp end fuse
+
+  !ERROR: Must be a constant value
+  !$omp fuse looprange(a,2)
+  do x = 1, i
+    v(x) = x * 2
+  end do
+  !$omp end fuse
+
+  !ERROR: Must be a constant value
+  !$omp fuse looprange(1,a)
+  do x = 1, i
+    v(x) = x * 2
+  end do
+  !$omp end fuse
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/loop-transformation-construct01.f90 b/flang/test/Semantics/OpenMP/loop-transformation-construct01.f90
index f718efc32aabf..927831a06d5fa 100644
--- a/flang/test/Semantics/OpenMP/loop-transformation-construct01.f90
+++ b/flang/test/Semantics/OpenMP/loop-transformation-construct01.f90
@@ -62,7 +62,7 @@ subroutine loop_transformation_construct4
   integer :: v(i)
 
   !$omp do
-  !ERROR: If a loop construct has been fully unrolled, it cannot then be tiled
+  !ERROR: If a loop construct has been fully unrolled, it cannot then be further transformed
   !$omp tile
   !$omp unroll full
   do x = 1, i
@@ -77,7 +77,7 @@ subroutine loop_transformation_construct5
   integer :: v(i)
 
   !$omp do
-  !ERROR: If a loop construct has been fully unrolled, it cannot then be tiled
+  !ERROR: If a loop construct has been fully unrolled, it cannot then be further transformed
   !$omp tile
   !$omp unroll
   do x = 1, i
diff --git a/flang/test/Semantics/OpenMP/loop-transformation-construct02.f90 b/flang/test/Semantics/OpenMP/loop-transformation-construct02.f90
new file mode 100644
index 0000000000000..d82fc3668198d
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/loop-transformation-construct02.f90
@@ -0,0 +1,93 @@
+! Testing the Semantics of loop sequences combined with 
+! nested Loop Transformation Constructs
+
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60
+
+subroutine loop_transformation_construct1
+  implicit none
+
+  !$omp do
+  !ERROR: The FUSE construct requires the END FUSE directive
+  !$omp fuse 
+end subroutine
+
+subroutine loop_transformation_construct2
+  implicit none
+
+  !$omp do
+  !ERROR: A DO loop must follow the FUSE directive
+  !$omp fuse 
+  !$omp end fuse
+end subroutine
+
+subroutine loop_transformation_construct3
+  implicit none
+  integer :: i = 5
+  integer :: y
+  integer :: v(i)
+
+  !$omp do
+  !$omp fuse
+  do x = 1, i
+    v(x) = x(x) * 2
+  end do
+  do x = 1, i
+    v(x) = x(x) * 2
+  end do
+  !$omp end fuse
+  !$omp end do
+  !ERROR: The END FUSE directive must follow the DO loop associated with the loop construct
+  !$omp end fuse
+end subroutine
+
+subroutine loop_transformation_construct4
+  implicit none
+  integer :: i = 5
+  integer :: y
+  integer :: v(i)
+
+  !$omp do
+  do x = 1, i
+    v(x) = x(x) * 2
+  end do
+  !ERROR: A DO loop must follow the FUSE directive
+  !$omp fuse
+  !$omp end fuse
+end subroutine
+
+subroutine loop_transformation_construct5
+  implicit none
+  integer :: i = 5
+  integer :: y
+  integer :: v(i)
+
+  !$omp do
+  !ERROR: If a loop construct has been fully unrolled, it cannot then be further transformed
+  !$omp fuse
+  !$omp unroll full
+  do x = 1, i
+    v(x) = x(x) * 2
+  end do
+  do x = 1, i
+    v(x) = x(x) * 2
+  end do
+  !$omp end fuse
+end subroutine
+
+subroutine loop_transformation_construct6
+  implicit none
+  integer :: i = 5
+  integer :: y
+  integer :: v(i)
+
+  !$omp do
+  !$omp fuse looprange(1,1)
+  !$omp unroll partial(2)
+  do x = 1, i
+    v(x) = x(x) * 2
+  end do
+  do x = 1, i
+    v(x) = x(x) * 2
+  end do
+  !$omp end fuse 
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/loop-transformation-construct03.f90 b/flang/test/Semantics/OpenMP/loop-transformation-construct03.f90
new file mode 100644
index 0000000000000..5e459c7985523
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/loop-transformation-construct03.f90
@@ -0,0 +1,39 @@
+! Testing the Semantic failure of forming loop sequences under regular OpenMP directives 
+
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60
+
+subroutine loop_transformation_construct1
+  implicit none
+  integer :: i = 5
+  integer :: y
+  integer :: v(i)
+
+  ! Only 1 do loop is associated with the OMP DO directive so the END DO directive is unmatched
+  !$omp do
+  do x = 1, i
+    v(x) = x(x) * 2
+  end do
+  do x = 1, i
+    v(x) = x(x) * 2
+  end do
+  !ERROR: The END DO directive must follow the DO loop associated with the loop construct
+  !$omp end do
+end subroutine
+
+subroutine loop_transformation_construct2
+  implicit none
+  integer :: i = 5
+  integer :: y
+  integer :: v(i)
+
+  ! Only 1 do loop is associated with the OMP TILE directive so the END TILE directive is unmatched
+  !$omp tile sizes(2)
+  do x = 1, i
+    v(x) = x(x) * 2
+  end do
+  do x = 1, i
+    v(x) = x(x) * 2
+  end do
+  !ERROR: The END TILE directive must follow the DO loop associated with the loop construct
+  !$omp end tile
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/loop-transformation-construct04.f90 b/flang/test/Semantics/OpenMP/loop-transformation-construct04.f90
new file mode 100644
index 0000000000000..2856247329f3b
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/loop-transformation-construct04.f90
@@ -0,0 +1,47 @@
+! Testing the Semantic failure of forming loop sequences under regular OpenMP directives 
+
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60
+
+subroutine loop_transformation_construct3
+  implicit none
+  integer, parameter :: i = 5
+  integer :: x
+  integer :: v(i)
+
+  !ERROR: The loop sequence following the DO construct must be fully fused first.
+  !$omp do
+  !$omp fuse looprange(1,2)
+  do x = 1, i
+    v(x) = x * 2
+  end do
+  do x = 1, i
+    v(x) = x * 2
+  end do
+  do x = 1, i
+    v(x) = x * 2
+  end do
+  !$omp end fuse
+  !$omp end do
+end subroutine
+
+subroutine loop_transformation_construct4
+  implicit none
+  integer, parameter :: i = 5
+  integer :: x
+  integer :: v(i)
+
+  !ERROR: The loop sequence following the TILE construct must be fully fused first.
+  !$omp tile sizes(2)
+  !$omp fuse looprange(1,2)
+  do x = 1, i
+    v(x) = x * 2
+  end do
+  do x = 1, i
+    v(x) = x * 2
+  end do
+  do x = 1, i
+    v(x) = x * 2
+  end do
+  !$omp end fuse
+  !$omp end tile
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/tile02.f90 b/flang/test/Semantics/OpenMP/tile02.f90
index 676796375353f..096a0f349932e 100644
--- a/flang/test/Semantics/OpenMP/tile02.f90
+++ b/flang/test/Semantics/OpenMP/tile02.f90
@@ -6,7 +6,7 @@ subroutine on_unroll
   implicit none
   integer i
 
-  !ERROR: If a loop construct has been fully unrolled, it cannot then be tiled
+  !ERROR: If a loop construct has been fully unrolled, it cannot then be further transformed
   !$omp tile sizes(2)
   !$omp unroll
   do i = 1, 5

>From be7423ab33d227e71dcd41ad59efce64bfd2dccc Mon Sep 17 00:00:00 2001
From: Ferran Toda <ferran.todacasaban at bsc.es>
Date: Thu, 20 Nov 2025 02:45:10 +0000
Subject: [PATCH 2/2] lower loop fuse

---
 flang/lib/Lower/OpenMP/ClauseProcessor.cpp    |   1 +
 flang/lib/Lower/OpenMP/Clauses.cpp            |   5 +-
 .../lib/Lower/OpenMP/DataSharingProcessor.cpp |   3 +-
 flang/lib/Lower/OpenMP/OpenMP.cpp             |  77 ++++++++--
 flang/lib/Lower/OpenMP/Utils.cpp              |  28 ++--
 flang/lib/Lower/OpenMP/Utils.h                |   6 +-
 flang/test/Lower/OpenMP/fuse01.f90            |  93 ++++++++++++
 flang/test/Lower/OpenMP/fuse02.f90            | 123 +++++++++++++++
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |  53 +++++++
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 111 ++++++++++++++
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |  34 +++++
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  |  68 +++++++++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  54 +++++++
 mlir/test/Dialect/OpenMP/cli-fuse.mlir        | 114 ++++++++++++++
 mlir/test/Dialect/OpenMP/invalid-fuse.mlir    | 100 +++++++++++++
 .../test/Target/LLVMIR/openmp-cli-fuse01.mlir | 100 +++++++++++++
 .../test/Target/LLVMIR/openmp-cli-fuse02.mlir | 140 ++++++++++++++++++
 .../test/transform/fuse/do-looprange.f90      |  60 ++++++++
 openmp/runtime/test/transform/fuse/do.f90     |  52 +++++++
 19 files changed, 1194 insertions(+), 28 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/fuse01.f90
 create mode 100644 flang/test/Lower/OpenMP/fuse02.f90
 create mode 100644 mlir/test/Dialect/OpenMP/cli-fuse.mlir
 create mode 100644 mlir/test/Dialect/OpenMP/invalid-fuse.mlir
 create mode 100644 mlir/test/Target/LLVMIR/openmp-cli-fuse01.mlir
 create mode 100644 mlir/test/Target/LLVMIR/openmp-cli-fuse02.mlir
 create mode 100644 openmp/runtime/test/transform/fuse/do-looprange.f90
 create mode 100644 openmp/runtime/test/transform/fuse/do.f90

diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 4a392381287d5..ab3a174c7ad69 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -279,6 +279,7 @@ bool ClauseProcessor::processCollapse(
     llvm::SmallVectorImpl<const semantics::Symbol *> &iv) const {
 
   int64_t numCollapse = collectLoopRelatedInfo(converter, currentLocation, eval,
+                                               eval.getFirstNestedEvaluation(),
                                                clauses, loopResult, iv);
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   collapseResult.collapseNumLoops = firOpBuilder.getI64IntegerAttr(numCollapse);
diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
index b1a3c3d3c5439..f2defc62dce91 100644
--- a/flang/lib/Lower/OpenMP/Clauses.cpp
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -1063,7 +1063,10 @@ Link make(const parser::OmpClause::Link &inp,
 
 LoopRange make(const parser::OmpClause::Looprange &inp,
                semantics::SemanticsContext &semaCtx) {
-  llvm_unreachable("Unimplemented: looprange");
+  auto &t0 = std::get<0>(inp.v.t);
+  auto &t1 = std::get<1>(inp.v.t);
+  return LoopRange{{/*First*/ makeExpr(t0, semaCtx),
+                    /*Count*/ makeExpr(t1, semaCtx)}};
 }
 
 Map make(const parser::OmpClause::Map &inp,
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 83c2eda0a2dc7..da9480123513f 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -347,7 +347,8 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) {
     mlir::omp::LoopRelatedClauseOps result;
     llvm::SmallVector<const semantics::Symbol *> iv;
     collectLoopRelatedInfo(converter, converter.getCurrentLocation(), eval,
-                           clauses, result, iv);
+                           eval.getFirstNestedEvaluation(), clauses, result,
+                           iv);
 
     // Update the original variable just before exiting the worksharing
     // loop. Conversion as follows:
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index b6efa8592c678..2d981f421a4ae 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1982,9 +1982,9 @@ genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
 static void genCanonicalLoopNest(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
-    mlir::Location loc, const ConstructQueue &queue,
-    ConstructQueue::const_iterator item, size_t numLoops,
-    llvm::SmallVectorImpl<mlir::omp::CanonicalLoopOp> &loops) {
+    lower::pft::Evaluation &nestedEval, mlir::Location loc,
+    const ConstructQueue &queue, ConstructQueue::const_iterator item,
+    size_t numLoops, llvm::SmallVectorImpl<mlir::omp::CanonicalLoopOp> &loops) {
   assert(loops.empty() && "Expecting empty list to fill");
   assert(numLoops >= 1 && "Expecting at least one loop");
 
@@ -1992,7 +1992,8 @@ static void genCanonicalLoopNest(
 
   mlir::omp::LoopRelatedClauseOps loopInfo;
   llvm::SmallVector<const semantics::Symbol *, 3> ivs;
-  collectLoopRelatedInfo(converter, loc, eval, numLoops, loopInfo, ivs);
+  collectLoopRelatedInfo(converter, loc, eval, nestedEval, numLoops, loopInfo,
+                         ivs);
   assert(ivs.size() == numLoops &&
          "Expected to parse as many loop variables as there are loops");
 
@@ -2014,7 +2015,7 @@ static void genCanonicalLoopNest(
 
   // Step 1: Loop prologues
   // Computing the trip count must happen before entering the outermost loop
-  lower::pft::Evaluation *innermostEval = &eval.getFirstNestedEvaluation();
+  lower::pft::Evaluation *innermostEval = &nestedEval;
   for ([[maybe_unused]] auto iv : ivs) {
     if (innermostEval->getIf<parser::DoConstruct>()->IsDoConcurrent()) {
       // OpenMP specifies DO CONCURRENT only with the `!omp loop` construct.
@@ -2186,7 +2187,8 @@ static void genTileOp(Fortran::lower::AbstractConverter &converter,
   llvm::SmallVector<mlir::omp::CanonicalLoopOp, 3> canonLoops;
   canonLoops.reserve(numLoops);
 
-  genCanonicalLoopNest(converter, symTable, semaCtx, eval, loc, queue, item,
+  genCanonicalLoopNest(converter, symTable, semaCtx, eval,
+                       eval.getFirstNestedEvaluation(), loc, queue, item,
                        numLoops, canonLoops);
   assert((canonLoops.size() == numLoops) &&
          "Expecting the predetermined number of loops");
@@ -2217,6 +2219,58 @@ static void genTileOp(Fortran::lower::AbstractConverter &converter,
                             sizesClause.sizes);
 }
 
+static void genFuseOp(Fortran::lower::AbstractConverter &converter,
+                      Fortran::lower::SymMap &symTable,
+                      lower::StatementContext &stmtCtx,
+                      Fortran::semantics::SemanticsContext &semaCtx,
+                      Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+                      const ConstructQueue &queue,
+                      ConstructQueue::const_iterator item) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+
+  int32_t first = 0;
+  int32_t count = 0;
+  auto iter = llvm::find_if(item->clauses, [](const Clause &clause) {
+    return clause.id == llvm::omp::Clause::OMPC_looprange;
+  });
+  if (iter != item->clauses.end()) {
+    const auto &looprange = std::get<clause::LoopRange>(iter->u);
+    first = evaluate::ToInt64(std::get<0>(looprange.t)).value();
+    count = evaluate::ToInt64(std::get<1>(looprange.t)).value();
+  }
+
+  llvm::SmallVector<mlir::Value> applyees;
+  for (auto &child : eval.getNestedEvaluations()) {
+    // Skip OmpEndLoopDirective
+    if (&child == &eval.getLastNestedEvaluation())
+      break;
+
+    // Emit the associated loop
+    llvm::SmallVector<mlir::omp::CanonicalLoopOp> canonLoops;
+    genCanonicalLoopNest(converter, symTable, semaCtx, eval, child, loc, queue,
+                         item, 1, canonLoops);
+
+    auto cli = llvm::getSingleElement(canonLoops).getCli();
+    applyees.push_back(cli);
+  }
+  // One generated loop + one for each loop not inside the specified looprange
+  // if present
+  llvm::SmallVector<mlir::Value> generatees;
+  int64_t numGeneratees = count == 0 ? 1 : applyees.size() - count + 1;
+  for (int i = 0; i < numGeneratees; i++) {
+    auto fusedCLI = mlir::omp::NewCliOp::create(firOpBuilder, loc);
+    generatees.push_back(fusedCLI);
+  }
+  auto op = mlir::omp::FuseOp::create(firOpBuilder, loc, generatees, applyees);
+
+  if (count != 0) {
+    mlir::IntegerAttr firstAttr = firOpBuilder.getI32IntegerAttr(first);
+    mlir::IntegerAttr countAttr = firOpBuilder.getI32IntegerAttr(count);
+    op->setAttr("first", firstAttr);
+    op->setAttr("count", countAttr);
+  }
+}
+
 static void genUnrollOp(Fortran::lower::AbstractConverter &converter,
                         Fortran::lower::SymMap &symTable,
                         lower::StatementContext &stmtCtx,
@@ -2233,7 +2287,8 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter,
 
   // Emit the associated loop
   llvm::SmallVector<mlir::omp::CanonicalLoopOp, 1> canonLoops;
-  genCanonicalLoopNest(converter, symTable, semaCtx, eval, loc, queue, item, 1,
+  genCanonicalLoopNest(converter, symTable, semaCtx, eval,
+                       eval.getFirstNestedEvaluation(), loc, queue, item, 1,
                        canonLoops);
 
   llvm::SmallVector<mlir::Value, 1> applyees;
@@ -3507,13 +3562,9 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
   case llvm::omp::Directive::OMPD_tile:
     genTileOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
     break;
-  case llvm::omp::Directive::OMPD_fuse: {
-    unsigned version = semaCtx.langOptions().OpenMPVersion;
-    if (!semaCtx.langOptions().OpenMPSimd)
-      TODO(loc, "Unhandled loop directive (" +
-                    llvm::omp::getOpenMPDirectiveName(dir, version) + ")");
+  case llvm::omp::Directive::OMPD_fuse:
+    genFuseOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
     break;
-  }
   case llvm::omp::Directive::OMPD_unroll:
     genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
     break;
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 7d7a4869ab3a6..913e4d1e69500 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -812,13 +812,14 @@ void collectTileSizesFromOpenMPConstruct(
 
 int64_t collectLoopRelatedInfo(
     lower::AbstractConverter &converter, mlir::Location currentLocation,
-    lower::pft::Evaluation &eval, const omp::List<omp::Clause> &clauses,
+    lower::pft::Evaluation &eval, lower::pft::Evaluation &nestedEval,
+    const omp::List<omp::Clause> &clauses,
     mlir::omp::LoopRelatedClauseOps &result,
     llvm::SmallVectorImpl<const semantics::Symbol *> &iv) {
   int64_t numCollapse = 1;
 
   // Collect the loops to collapse.
-  lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation();
+  lower::pft::Evaluation *doConstructEval = &nestedEval;
   if (doConstructEval->getIf<parser::DoConstruct>()->IsDoConcurrent()) {
     TODO(currentLocation, "Do Concurrent in Worksharing loop construct");
   }
@@ -830,21 +831,21 @@ int64_t collectLoopRelatedInfo(
     numCollapse = collapseValue;
   }
 
-  collectLoopRelatedInfo(converter, currentLocation, eval, numCollapse, result,
-                         iv);
+  collectLoopRelatedInfo(converter, currentLocation, eval, nestedEval,
+                         numCollapse, result, iv);
   return numCollapse;
 }
 
 void collectLoopRelatedInfo(
     lower::AbstractConverter &converter, mlir::Location currentLocation,
-    lower::pft::Evaluation &eval, int64_t numCollapse,
-    mlir::omp::LoopRelatedClauseOps &result,
+    lower::pft::Evaluation &eval, lower::pft::Evaluation &nestedEval,
+    int64_t numCollapse, mlir::omp::LoopRelatedClauseOps &result,
     llvm::SmallVectorImpl<const semantics::Symbol *> &iv) {
 
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
   // Collect the loops to collapse.
-  lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation();
+  lower::pft::Evaluation *doConstructEval = &nestedEval;
   if (doConstructEval->getIf<parser::DoConstruct>()->IsDoConcurrent()) {
     TODO(currentLocation, "Do Concurrent in Worksharing loop construct");
   }
@@ -852,10 +853,15 @@ void collectLoopRelatedInfo(
   // Collect sizes from tile directive if present.
   std::int64_t sizesLengthValue = 0l;
   if (auto *ompCons{eval.getIf<parser::OpenMPConstruct>()}) {
-    processTileSizesFromOpenMPConstruct(
-        ompCons, [&](const parser::OmpClause::Sizes *tclause) {
-          sizesLengthValue = tclause->v.size();
-        });
+    if (auto *ompLoop{std::get_if<parser::OpenMPLoopConstruct>(&ompCons->u)}) {
+      const parser::OmpDirectiveSpecification &beginSpec{ompLoop->BeginDir()};
+      if (beginSpec.DirId() == llvm::omp::Directive::OMPD_tile) {
+        processTileSizesFromOpenMPConstruct(
+            ompCons, [&](const parser::OmpClause::Sizes *tclause) {
+              sizesLengthValue = tclause->v.size();
+            });
+      }
+    }
   }
 
   std::int64_t collapseValue = std::max(numCollapse, sizesLengthValue);
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 2960b663b08b2..886a5c1835f7e 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -169,13 +169,15 @@ void lastprivateModifierNotSupported(const omp::clause::Lastprivate &lastp,
 
 int64_t collectLoopRelatedInfo(
     lower::AbstractConverter &converter, mlir::Location currentLocation,
-    lower::pft::Evaluation &eval, const omp::List<omp::Clause> &clauses,
+    lower::pft::Evaluation &eval, lower::pft::Evaluation &nestedEval,
+    const omp::List<omp::Clause> &clauses,
     mlir::omp::LoopRelatedClauseOps &result,
     llvm::SmallVectorImpl<const semantics::Symbol *> &iv);
 
 void collectLoopRelatedInfo(
     lower::AbstractConverter &converter, mlir::Location currentLocation,
-    lower::pft::Evaluation &eval, std::int64_t collapseValue,
+    lower::pft::Evaluation &eval, lower::pft::Evaluation &nestedEval,
+    std::int64_t collapseValue,
     // const omp::List<omp::Clause> &clauses,
     mlir::omp::LoopRelatedClauseOps &result,
     llvm::SmallVectorImpl<const semantics::Symbol *> &iv);
diff --git a/flang/test/Lower/OpenMP/fuse01.f90 b/flang/test/Lower/OpenMP/fuse01.f90
new file mode 100644
index 0000000000000..1377bf3e9c529
--- /dev/null
+++ b/flang/test/Lower/OpenMP/fuse01.f90
@@ -0,0 +1,93 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 -o - %s | FileCheck %s
+
+
+subroutine omp_fuse01(lb1, ub1, inc1, lb2, ub2, inc2)
+  integer res, i, j
+  integer lb1, ub1, inc1
+  integer lb2, ub2, inc2
+
+  !$omp fuse
+  do i = lb1, ub1, inc1
+    res = i
+  end do
+  do j = lb2, ub2, inc2
+    res = j
+  end do
+  !$omp end fuse
+
+end subroutine omp_fuse01
+
+
+! CHECK-LABEL:   func.func @_QPomp_fuse01(
+! CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "lb1"},
+! CHECK-SAME:      %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "ub1"},
+! CHECK-SAME:      %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "inc1"},
+! CHECK-SAME:      %[[ARG3:.*]]: !fir.ref<i32> {fir.bindc_name = "lb2"},
+! CHECK-SAME:      %[[ARG4:.*]]: !fir.ref<i32> {fir.bindc_name = "ub2"},
+! CHECK-SAME:      %[[ARG5:.*]]: !fir.ref<i32> {fir.bindc_name = "inc2"}) {
+! CHECK:           %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[ALLOCA_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_fuse01Ei"}
+! CHECK:           %[[DECLARE_0:.*]]:2 = hlfir.declare %[[ALLOCA_0]] {uniq_name = "_QFomp_fuse01Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[DECLARE_1:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse01Einc1"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[DECLARE_2:.*]]:2 = hlfir.declare %[[ARG5]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse01Einc2"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[ALLOCA_1:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFomp_fuse01Ej"}
+! CHECK:           %[[DECLARE_3:.*]]:2 = hlfir.declare %[[ALLOCA_1]] {uniq_name = "_QFomp_fuse01Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[DECLARE_4:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse01Elb1"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[DECLARE_5:.*]]:2 = hlfir.declare %[[ARG3]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse01Elb2"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[ALLOCA_2:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_fuse01Eres"}
+! CHECK:           %[[DECLARE_6:.*]]:2 = hlfir.declare %[[ALLOCA_2]] {uniq_name = "_QFomp_fuse01Eres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[DECLARE_7:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse01Eub1"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[DECLARE_8:.*]]:2 = hlfir.declare %[[ARG4]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse01Eub2"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[LOAD_0:.*]] = fir.load %[[DECLARE_4]]#0 : !fir.ref<i32>
+! CHECK:           %[[LOAD_1:.*]] = fir.load %[[DECLARE_7]]#0 : !fir.ref<i32>
+! CHECK:           %[[LOAD_2:.*]] = fir.load %[[DECLARE_1]]#0 : !fir.ref<i32>
+! CHECK:           %[[CONSTANT_0:.*]] = arith.constant 0 : i32
+! CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1 : i32
+! CHECK:           %[[CMPI_0:.*]] = arith.cmpi slt, %[[LOAD_2]], %[[CONSTANT_0]] : i32
+! CHECK:           %[[SUBI_0:.*]] = arith.subi %[[CONSTANT_0]], %[[LOAD_2]] : i32
+! CHECK:           %[[SELECT_0:.*]] = arith.select %[[CMPI_0]], %[[SUBI_0]], %[[LOAD_2]] : i32
+! CHECK:           %[[SELECT_1:.*]] = arith.select %[[CMPI_0]], %[[LOAD_1]], %[[LOAD_0]] : i32
+! CHECK:           %[[SELECT_2:.*]] = arith.select %[[CMPI_0]], %[[LOAD_0]], %[[LOAD_1]] : i32
+! CHECK:           %[[SUBI_1:.*]] = arith.subi %[[SELECT_2]], %[[SELECT_1]] overflow<nuw> : i32
+! CHECK:           %[[DIVUI_0:.*]] = arith.divui %[[SUBI_1]], %[[SELECT_0]] : i32
+! CHECK:           %[[ADDI_0:.*]] = arith.addi %[[DIVUI_0]], %[[CONSTANT_1]] overflow<nuw> : i32
+! CHECK:           %[[CMPI_1:.*]] = arith.cmpi slt, %[[SELECT_2]], %[[SELECT_1]] : i32
+! CHECK:           %[[SELECT_3:.*]] = arith.select %[[CMPI_1]], %[[CONSTANT_0]], %[[ADDI_0]] : i32
+! CHECK:           %[[NEW_CLI_0:.*]] = omp.new_cli
+! CHECK:           omp.canonical_loop(%[[NEW_CLI_0]]) %[[VAL_0:.*]] : i32 in range(%[[SELECT_3]]) {
+! CHECK:             %[[MULI_0:.*]] = arith.muli %[[VAL_0]], %[[LOAD_2]] : i32
+! CHECK:             %[[ADDI_1:.*]] = arith.addi %[[LOAD_0]], %[[MULI_0]] : i32
+! CHECK:             hlfir.assign %[[ADDI_1]] to %[[DECLARE_0]]#0 : i32, !fir.ref<i32>
+! CHECK:             %[[LOAD_3:.*]] = fir.load %[[DECLARE_0]]#0 : !fir.ref<i32>
+! CHECK:             hlfir.assign %[[LOAD_3]] to %[[DECLARE_6]]#0 : i32, !fir.ref<i32>
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           %[[LOAD_4:.*]] = fir.load %[[DECLARE_5]]#0 : !fir.ref<i32>
+! CHECK:           %[[LOAD_5:.*]] = fir.load %[[DECLARE_8]]#0 : !fir.ref<i32>
+! CHECK:           %[[LOAD_6:.*]] = fir.load %[[DECLARE_2]]#0 : !fir.ref<i32>
+! CHECK:           %[[CONSTANT_2:.*]] = arith.constant 0 : i32
+! CHECK:           %[[CONSTANT_3:.*]] = arith.constant 1 : i32
+! CHECK:           %[[CMPI_2:.*]] = arith.cmpi slt, %[[LOAD_6]], %[[CONSTANT_2]] : i32
+! CHECK:           %[[SUBI_2:.*]] = arith.subi %[[CONSTANT_2]], %[[LOAD_6]] : i32
+! CHECK:           %[[SELECT_4:.*]] = arith.select %[[CMPI_2]], %[[SUBI_2]], %[[LOAD_6]] : i32
+! CHECK:           %[[SELECT_5:.*]] = arith.select %[[CMPI_2]], %[[LOAD_5]], %[[LOAD_4]] : i32
+! CHECK:           %[[SELECT_6:.*]] = arith.select %[[CMPI_2]], %[[LOAD_4]], %[[LOAD_5]] : i32
+! CHECK:           %[[SUBI_3:.*]] = arith.subi %[[SELECT_6]], %[[SELECT_5]] overflow<nuw> : i32
+! CHECK:           %[[DIVUI_1:.*]] = arith.divui %[[SUBI_3]], %[[SELECT_4]] : i32
+! CHECK:           %[[ADDI_2:.*]] = arith.addi %[[DIVUI_1]], %[[CONSTANT_3]] overflow<nuw> : i32
+! CHECK:           %[[CMPI_3:.*]] = arith.cmpi slt, %[[SELECT_6]], %[[SELECT_5]] : i32
+! CHECK:           %[[SELECT_7:.*]] = arith.select %[[CMPI_3]], %[[CONSTANT_2]], %[[ADDI_2]] : i32
+! CHECK:           %[[NEW_CLI_1:.*]] = omp.new_cli
+! CHECK:           omp.canonical_loop(%[[NEW_CLI_1]]) %[[VAL_1:.*]] : i32 in range(%[[SELECT_7]]) {
+! CHECK:             %[[MULI_1:.*]] = arith.muli %[[VAL_1]], %[[LOAD_6]] : i32
+! CHECK:             %[[ADDI_3:.*]] = arith.addi %[[LOAD_4]], %[[MULI_1]] : i32
+! CHECK:             hlfir.assign %[[ADDI_3]] to %[[DECLARE_3]]#0 : i32, !fir.ref<i32>
+! CHECK:             %[[LOAD_7:.*]] = fir.load %[[DECLARE_3]]#0 : !fir.ref<i32>
+! CHECK:             hlfir.assign %[[LOAD_7]] to %[[DECLARE_6]]#0 : i32, !fir.ref<i32>
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           %[[NEW_CLI_2:.*]] = omp.new_cli
+! CHECK:           omp.fuse (%[[NEW_CLI_2]]) <- (%[[NEW_CLI_0]], %[[NEW_CLI_1]])
+! CHECK:           return
+! CHECK:         }
+
diff --git a/flang/test/Lower/OpenMP/fuse02.f90 b/flang/test/Lower/OpenMP/fuse02.f90
new file mode 100644
index 0000000000000..5a0f37827c36a
--- /dev/null
+++ b/flang/test/Lower/OpenMP/fuse02.f90
@@ -0,0 +1,123 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 -o - %s | FileCheck %s
+
+
+subroutine omp_fuse02(lb1, ub1, inc1, lb2, ub2, inc2)
+  integer res, i, j, k
+  integer lb1, ub1, inc1
+  integer lb2, ub2, inc2
+
+  !$omp fuse looprange(2,2)
+  do i = lb1, ub1, inc1
+    res = i
+  end do
+  do j = lb2, ub2, inc2
+    res = j
+  end do
+  do k = lb1, ub2, inc1
+    res = k
+  end do
+  !$omp end fuse
+
+end subroutine omp_fuse02
+
+
+! CHECK-LABEL:   func.func @_QPomp_fuse02(
+! CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "lb1"},
+! CHECK-SAME:      %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "ub1"},
+! CHECK-SAME:      %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "inc1"},
+! CHECK-SAME:      %[[ARG3:.*]]: !fir.ref<i32> {fir.bindc_name = "lb2"},
+! CHECK-SAME:      %[[ARG4:.*]]: !fir.ref<i32> {fir.bindc_name = "ub2"},
+! CHECK-SAME:      %[[ARG5:.*]]: !fir.ref<i32> {fir.bindc_name = "inc2"}) {
+! CHECK:           %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[ALLOCA_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_fuse02Ei"}
+! CHECK:           %[[DECLARE_0:.*]]:2 = hlfir.declare %[[ALLOCA_0]] {uniq_name = "_QFomp_fuse02Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[DECLARE_1:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse02Einc1"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[DECLARE_2:.*]]:2 = hlfir.declare %[[ARG5]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse02Einc2"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[ALLOCA_1:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFomp_fuse02Ej"}
+! CHECK:           %[[DECLARE_3:.*]]:2 = hlfir.declare %[[ALLOCA_1]] {uniq_name = "_QFomp_fuse02Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[ALLOCA_2:.*]] = fir.alloca i32 {bindc_name = "k", uniq_name = "_QFomp_fuse02Ek"}
+! CHECK:           %[[DECLARE_4:.*]]:2 = hlfir.declare %[[ALLOCA_2]] {uniq_name = "_QFomp_fuse02Ek"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[DECLARE_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse02Elb1"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[DECLARE_6:.*]]:2 = hlfir.declare %[[ARG3]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse02Elb2"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[ALLOCA_3:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_fuse02Eres"}
+! CHECK:           %[[DECLARE_7:.*]]:2 = hlfir.declare %[[ALLOCA_3]] {uniq_name = "_QFomp_fuse02Eres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[DECLARE_8:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse02Eub1"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[DECLARE_9:.*]]:2 = hlfir.declare %[[ARG4]] dummy_scope %[[DUMMY_SCOPE_0]] arg {{[0-9]+}} {uniq_name = "_QFomp_fuse02Eub2"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[LOAD_0:.*]] = fir.load %[[DECLARE_5]]#0 : !fir.ref<i32>
+! CHECK:           %[[LOAD_1:.*]] = fir.load %[[DECLARE_8]]#0 : !fir.ref<i32>
+! CHECK:           %[[LOAD_2:.*]] = fir.load %[[DECLARE_1]]#0 : !fir.ref<i32>
+! CHECK:           %[[CONSTANT_0:.*]] = arith.constant 0 : i32
+! CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1 : i32
+! CHECK:           %[[CMPI_0:.*]] = arith.cmpi slt, %[[LOAD_2]], %[[CONSTANT_0]] : i32
+! CHECK:           %[[SUBI_0:.*]] = arith.subi %[[CONSTANT_0]], %[[LOAD_2]] : i32
+! CHECK:           %[[SELECT_0:.*]] = arith.select %[[CMPI_0]], %[[SUBI_0]], %[[LOAD_2]] : i32
+! CHECK:           %[[SELECT_1:.*]] = arith.select %[[CMPI_0]], %[[LOAD_1]], %[[LOAD_0]] : i32
+! CHECK:           %[[SELECT_2:.*]] = arith.select %[[CMPI_0]], %[[LOAD_0]], %[[LOAD_1]] : i32
+! CHECK:           %[[SUBI_1:.*]] = arith.subi %[[SELECT_2]], %[[SELECT_1]] overflow<nuw> : i32
+! CHECK:           %[[DIVUI_0:.*]] = arith.divui %[[SUBI_1]], %[[SELECT_0]] : i32
+! CHECK:           %[[ADDI_0:.*]] = arith.addi %[[DIVUI_0]], %[[CONSTANT_1]] overflow<nuw> : i32
+! CHECK:           %[[CMPI_1:.*]] = arith.cmpi slt, %[[SELECT_2]], %[[SELECT_1]] : i32
+! CHECK:           %[[SELECT_3:.*]] = arith.select %[[CMPI_1]], %[[CONSTANT_0]], %[[ADDI_0]] : i32
+! CHECK:           %[[NEW_CLI_0:.*]] = omp.new_cli
+! CHECK:           omp.canonical_loop(%[[NEW_CLI_0]]) %[[VAL_0:.*]] : i32 in range(%[[SELECT_3]]) {
+! CHECK:             %[[MULI_0:.*]] = arith.muli %[[VAL_0]], %[[LOAD_2]] : i32
+! CHECK:             %[[ADDI_1:.*]] = arith.addi %[[LOAD_0]], %[[MULI_0]] : i32
+! CHECK:             hlfir.assign %[[ADDI_1]] to %[[DECLARE_0]]#0 : i32, !fir.ref<i32>
+! CHECK:             %[[LOAD_3:.*]] = fir.load %[[DECLARE_0]]#0 : !fir.ref<i32>
+! CHECK:             hlfir.assign %[[LOAD_3]] to %[[DECLARE_7]]#0 : i32, !fir.ref<i32>
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           %[[LOAD_4:.*]] = fir.load %[[DECLARE_6]]#0 : !fir.ref<i32>
+! CHECK:           %[[LOAD_5:.*]] = fir.load %[[DECLARE_9]]#0 : !fir.ref<i32>
+! CHECK:           %[[LOAD_6:.*]] = fir.load %[[DECLARE_2]]#0 : !fir.ref<i32>
+! CHECK:           %[[CONSTANT_2:.*]] = arith.constant 0 : i32
+! CHECK:           %[[CONSTANT_3:.*]] = arith.constant 1 : i32
+! CHECK:           %[[CMPI_2:.*]] = arith.cmpi slt, %[[LOAD_6]], %[[CONSTANT_2]] : i32
+! CHECK:           %[[SUBI_2:.*]] = arith.subi %[[CONSTANT_2]], %[[LOAD_6]] : i32
+! CHECK:           %[[SELECT_4:.*]] = arith.select %[[CMPI_2]], %[[SUBI_2]], %[[LOAD_6]] : i32
+! CHECK:           %[[SELECT_5:.*]] = arith.select %[[CMPI_2]], %[[LOAD_5]], %[[LOAD_4]] : i32
+! CHECK:           %[[SELECT_6:.*]] = arith.select %[[CMPI_2]], %[[LOAD_4]], %[[LOAD_5]] : i32
+! CHECK:           %[[SUBI_3:.*]] = arith.subi %[[SELECT_6]], %[[SELECT_5]] overflow<nuw> : i32
+! CHECK:           %[[DIVUI_1:.*]] = arith.divui %[[SUBI_3]], %[[SELECT_4]] : i32
+! CHECK:           %[[ADDI_2:.*]] = arith.addi %[[DIVUI_1]], %[[CONSTANT_3]] overflow<nuw> : i32
+! CHECK:           %[[CMPI_3:.*]] = arith.cmpi slt, %[[SELECT_6]], %[[SELECT_5]] : i32
+! CHECK:           %[[SELECT_7:.*]] = arith.select %[[CMPI_3]], %[[CONSTANT_2]], %[[ADDI_2]] : i32
+! CHECK:           %[[NEW_CLI_1:.*]] = omp.new_cli
+! CHECK:           omp.canonical_loop(%[[NEW_CLI_1]]) %[[VAL_1:.*]] : i32 in range(%[[SELECT_7]]) {
+! CHECK:             %[[MULI_1:.*]] = arith.muli %[[VAL_1]], %[[LOAD_6]] : i32
+! CHECK:             %[[ADDI_3:.*]] = arith.addi %[[LOAD_4]], %[[MULI_1]] : i32
+! CHECK:             hlfir.assign %[[ADDI_3]] to %[[DECLARE_3]]#0 : i32, !fir.ref<i32>
+! CHECK:             %[[LOAD_7:.*]] = fir.load %[[DECLARE_3]]#0 : !fir.ref<i32>
+! CHECK:             hlfir.assign %[[LOAD_7]] to %[[DECLARE_7]]#0 : i32, !fir.ref<i32>
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           %[[LOAD_8:.*]] = fir.load %[[DECLARE_5]]#0 : !fir.ref<i32>
+! CHECK:           %[[LOAD_9:.*]] = fir.load %[[DECLARE_9]]#0 : !fir.ref<i32>
+! CHECK:           %[[LOAD_10:.*]] = fir.load %[[DECLARE_1]]#0 : !fir.ref<i32>
+! CHECK:           %[[CONSTANT_4:.*]] = arith.constant 0 : i32
+! CHECK:           %[[CONSTANT_5:.*]] = arith.constant 1 : i32
+! CHECK:           %[[CMPI_4:.*]] = arith.cmpi slt, %[[LOAD_10]], %[[CONSTANT_4]] : i32
+! CHECK:           %[[SUBI_4:.*]] = arith.subi %[[CONSTANT_4]], %[[LOAD_10]] : i32
+! CHECK:           %[[SELECT_8:.*]] = arith.select %[[CMPI_4]], %[[SUBI_4]], %[[LOAD_10]] : i32
+! CHECK:           %[[SELECT_9:.*]] = arith.select %[[CMPI_4]], %[[LOAD_9]], %[[LOAD_8]] : i32
+! CHECK:           %[[SELECT_10:.*]] = arith.select %[[CMPI_4]], %[[LOAD_8]], %[[LOAD_9]] : i32
+! CHECK:           %[[SUBI_5:.*]] = arith.subi %[[SELECT_10]], %[[SELECT_9]] overflow<nuw> : i32
+! CHECK:           %[[DIVUI_2:.*]] = arith.divui %[[SUBI_5]], %[[SELECT_8]] : i32
+! CHECK:           %[[ADDI_4:.*]] = arith.addi %[[DIVUI_2]], %[[CONSTANT_5]] overflow<nuw> : i32
+! CHECK:           %[[CMPI_5:.*]] = arith.cmpi slt, %[[SELECT_10]], %[[SELECT_9]] : i32
+! CHECK:           %[[SELECT_11:.*]] = arith.select %[[CMPI_5]], %[[CONSTANT_4]], %[[ADDI_4]] : i32
+! CHECK:           %[[NEW_CLI_2:.*]] = omp.new_cli
+! CHECK:           omp.canonical_loop(%[[NEW_CLI_2]]) %[[VAL_2:.*]] : i32 in range(%[[SELECT_11]]) {
+! CHECK:             %[[MULI_2:.*]] = arith.muli %[[VAL_2]], %[[LOAD_10]] : i32
+! CHECK:             %[[ADDI_5:.*]] = arith.addi %[[LOAD_8]], %[[MULI_2]] : i32
+! CHECK:             hlfir.assign %[[ADDI_5]] to %[[DECLARE_4]]#0 : i32, !fir.ref<i32>
+! CHECK:             %[[LOAD_11:.*]] = fir.load %[[DECLARE_4]]#0 : !fir.ref<i32>
+! CHECK:             hlfir.assign %[[LOAD_11]] to %[[DECLARE_7]]#0 : i32, !fir.ref<i32>
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           %[[NEW_CLI_3:.*]] = omp.new_cli
+! CHECK:           %[[NEW_CLI_4:.*]] = omp.new_cli
+! CHECK:           omp.fuse (%[[NEW_CLI_3]], %[[NEW_CLI_4]]) <- (%[[NEW_CLI_0]], %[[NEW_CLI_1]], %[[NEW_CLI_2]]) {count = 2 : i32, first = 2 : i32}
+! CHECK:           return
+! CHECK:         }
+
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 9f77c24d0b27b..d8f45a4c69059 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1272,6 +1272,59 @@ class OpenMPIRBuilder {
   tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
             ArrayRef<Value *> TileSizes);
 
+  /// Fuse a sequence of loops.
+  ///
+  /// Fuses the loops of \p Loops.
+  /// The merging of the loops is done in the following structure:
+  ///
+  /// Example:
+  /// \code
+  ///   for (int i = lb0; i < ub0; i += st0) // trip count is calculated as:
+  ///     body(i)                            // tc0 = (ub0 - lb0 + st0) / st0
+  ///   for (int j = lb1; j < ub1; j += st1)
+  ///     body(j);
+  ///
+  ///   ...
+  ///
+  ///   for (int k = lbk; j < ubk; j += stk)
+  ///     body(k);
+  /// \endcode
+  ///
+  /// After fusing the loops a single loop is left:
+  /// \code
+  /// for (fuse.index = 0; fuse.index < max(tc0, tc1, ... tck); ++fuse.index) {
+  ///    if (fuse.index < tc0){
+  ///      iv0 = lb0 + st0 * fuse.index;
+  ///      original.index0 = iv0
+  ///      body(0);
+  ///    }
+  ///    if (fuse.index < tc1){
+  ///      iv1 = lb1 + st1 * fuse.index;
+  ///      original.index1 = iv1
+  ///      body(1);
+  ///    }
+  ///
+  ///    ...
+  ///
+  ///    if (fuse.index < tck){
+  ///      ivk = lbk + stk * fuse.index;
+  ///      original.indexk = ivk
+  ///      body(k);
+  ///    }
+  /// }
+  /// \endcode
+  ///
+  ///
+  /// @param DL        Debug location for instructions added by fusion.
+  ///
+  /// @param Loops     Loops to fuse. The CanonicalLoopInfo objects are
+  ///                  invalidated by this method, i.e. should not used after
+  ///                  fusion.
+  ///
+  /// \returns A single loop generated by the loop fusion
+  LLVM_ABI CanonicalLoopInfo *fuseLoops(DebugLoc DL,
+                                        ArrayRef<CanonicalLoopInfo *> Loops);
+
   /// Fully unroll a loop.
   ///
   /// Instead of unrolling the loop immediately (and duplicating its body
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index ac86fa859967e..a3cb98456b249 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -5806,6 +5806,117 @@ static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup,
   }
 }
 
+CanonicalLoopInfo *
+OpenMPIRBuilder::fuseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops) {
+
+  CanonicalLoopInfo *firstLoop = Loops.front();
+  CanonicalLoopInfo *lastLoop = Loops.back();
+  Function *F = firstLoop->getPreheader()->getParent();
+
+  // Loop control blocks that will become orphaned later
+  SmallVector<BasicBlock *> oldControlBBs;
+  for (CanonicalLoopInfo *Loop : Loops)
+    Loop->collectControlBlocks(oldControlBBs);
+
+  // Collect original trip counts
+  SmallVector<Value *> origTripCounts;
+  for (CanonicalLoopInfo *L : Loops) {
+    assert(L->isValid() && "All input loops must be valid canonical loops");
+    origTripCounts.push_back(L->getTripCount());
+  }
+
+  Builder.SetCurrentDebugLocation(DL);
+
+  // Compute max trip count.
+  // The fused loop will be from 0 to max(origTripCounts)
+  BasicBlock *TCBlock = BasicBlock::Create(F->getContext(), "omp.fuse.comp.tc",
+                                           F, firstLoop->getHeader());
+  Builder.SetInsertPoint(TCBlock);
+  Value *fusedTripCount = nullptr;
+  for (CanonicalLoopInfo *L : Loops) {
+    assert(L->isValid() && "All loops to fuse must be valid canonical loops");
+    Value *origTripCount = L->getTripCount();
+    if (!fusedTripCount) {
+      fusedTripCount = origTripCount;
+      continue;
+    }
+    Value *condTP = Builder.CreateICmpSGT(fusedTripCount, origTripCount);
+    fusedTripCount = Builder.CreateSelect(condTP, fusedTripCount, origTripCount,
+                                          Twine(".omp.fuse.tc"));
+  }
+
+  // Generate new loop
+  CanonicalLoopInfo *fused =
+      createLoopSkeleton(DL, fusedTripCount, F, firstLoop->getBody(),
+                         lastLoop->getLatch(), "fused");
+
+  // Replace original loops with the fused loop
+  // Preheader and After are not considered inside the CLI.
+  // These are used to compute the individual TCs of the loops
+  // so they have to be put before the resulting fused loop.
+  // Moving them up for readability.
+  for (size_t i = 0; i < Loops.size() - 1; ++i) {
+    Loops[i]->getPreheader()->moveBefore(TCBlock);
+    Loops[i]->getAfter()->moveBefore(TCBlock);
+  }
+  lastLoop->getPreheader()->moveBefore(TCBlock);
+
+  for (size_t i = 0; i < Loops.size() - 1; ++i) {
+    redirectTo(Loops[i]->getPreheader(), Loops[i]->getAfter(), DL);
+    redirectTo(Loops[i]->getAfter(), Loops[i + 1]->getPreheader(), DL);
+  }
+  redirectTo(lastLoop->getPreheader(), TCBlock, DL);
+  redirectTo(TCBlock, fused->getPreheader(), DL);
+  redirectTo(fused->getAfter(), lastLoop->getAfter(), DL);
+
+  // Build the fused body
+  // Create new Blocks with conditions that jump to the original loop bodies
+  SmallVector<BasicBlock *> condBBs;
+  SmallVector<Value *> condValues;
+  for (size_t i = 0; i < Loops.size(); ++i) {
+    BasicBlock *condBlock = BasicBlock::Create(
+        F->getContext(), "omp.fused.inner.cond", F, Loops[i]->getBody());
+    Builder.SetInsertPoint(condBlock);
+    Value *condValue =
+        Builder.CreateICmpSLT(fused->getIndVar(), origTripCounts[i]);
+    condBBs.push_back(condBlock);
+    condValues.push_back(condValue);
+  }
+  // Join the condition blocks with the bodies of the original loops
+  redirectTo(fused->getBody(), condBBs[0], DL);
+  for (size_t i = 0; i < Loops.size() - 1; ++i) {
+    Builder.SetInsertPoint(condBBs[i]);
+    Builder.CreateCondBr(condValues[i], Loops[i]->getBody(), condBBs[i + 1]);
+    redirectAllPredecessorsTo(Loops[i]->getLatch(), condBBs[i + 1], DL);
+    // Replace the IV with the fused IV
+    Loops[i]->getIndVar()->replaceAllUsesWith(fused->getIndVar());
+  }
+  // Last body jumps to the created end body block
+  Builder.SetInsertPoint(condBBs.back());
+  Builder.CreateCondBr(condValues.back(), lastLoop->getBody(),
+                       fused->getLatch());
+  redirectAllPredecessorsTo(lastLoop->getLatch(), fused->getLatch(), DL);
+  // Replace the IV with the fused IV
+  lastLoop->getIndVar()->replaceAllUsesWith(fused->getIndVar());
+
+  // The loop latch must have only one predecessor. Currently it is branched to
+  // from both the last condition block and the last loop body
+  fused->getLatch()->splitBasicBlock(fused->getLatch()->begin(),
+                                     "omp.fused.pre_latch", /*Before=*/true);
+
+  // Remove unused parts
+  removeUnusedBlocksFromParent(oldControlBBs);
+
+  // Invalidate old CLIs
+  for (CanonicalLoopInfo *L : Loops)
+    L->invalidate();
+
+#ifndef NDEBUG
+  fused->assertOK();
+#endif
+  return fused;
+}
+
 void OpenMPIRBuilder::unrollLoopFull(DebugLoc, CanonicalLoopInfo *Loop) {
   LLVMContext &Ctx = Builder.getContext();
   addLoopMetadata(
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 377f1febf6b8f..2752c2a806847 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -550,6 +550,40 @@ def TileOp : OpenMPTransformBase_Op<"tile",
   let hasVerifier = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// OpenMP fuse operation
+//===----------------------------------------------------------------------===//
+
+def FuseOp : OpenMPTransformBase_Op<"fuse"> {
+  let summary = "OpenMP fuse operation";
+  let description = [{
+    Represents the OpenMP fuse directive introduced in OpenMP 6.0.
+    
+    The construct takes a loop sequence and merges the loops specifed by the 
+    first and count attributes and generates a loop sequence with the loops
+    before the first attribute untouched, the generated fused loop, and the loops
+    after the the first + count attribute untouched mantaining the orignal
+    order. If no attributes are specified all the loops in the sequence are 
+    fused generating a single loop.
+    Each logical iteration of the fused loop executes a logical iteration of
+    each affected loop. The fused loop has the number of logical iterations 
+    equal to the affected loop with most logical iterations.
+
+    The first and count attributes are constant and known beforehand.
+  }]#clausesDescription;
+
+  let extraClassDeclaration = [{
+    IntegerAttr getFirst() {
+      return this->getOperation()->getAttrOfType<mlir::IntegerAttr>("first");
+    }
+    IntegerAttr getCount() {
+      return this->getOperation()->getAttrOfType<mlir::IntegerAttr>("count");
+    }
+  }]#clausesExtraClassDeclaration;
+
+  let hasVerifier = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // 2.8.3 Workshare Construct
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 1b069c62a8be9..8373a18df281a 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -3429,6 +3429,20 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
             .Case([&](UnrollHeuristicOp op) -> std::string {
               llvm_unreachable("heuristic unrolling does not generate a loop");
             })
+            .Case([&](FuseOp op) -> std::string {
+              unsigned int first = 0;
+              unsigned int count = 0;
+              if (op.getFirst() && op.getCount()) {
+                first = op.getFirst().getInt();
+                count = op.getCount().getInt();
+              }
+              unsigned opnum = generator->getOperandNumber();
+              if ((first != 0 && opnum <= first - 1) ||
+                  (count != 0 && opnum >= first + 1))
+                return "canonloop_fuse";
+              else
+                return "fused";
+            })
             .Case([&](TileOp op) -> std::string {
               auto [generateesFirst, generateesCount] =
                   op.getGenerateesODSOperandIndexAndLength();
@@ -3804,6 +3818,60 @@ std::pair<unsigned, unsigned> TileOp::getGenerateesODSOperandIndexAndLength() {
   return getODSOperandIndexAndLength(odsIndex_generatees);
 }
 
+//===----------------------------------------------------------------------===//
+// FuseOp
+//===----------------------------------------------------------------------===//
+
+static void printLoopTransformClis(OpAsmPrinter &p, FuseOp op,
+                                   OperandRange generatees,
+                                   OperandRange applyees) {
+  if (!generatees.empty())
+    p << '(' << llvm::interleaved(generatees) << ')';
+
+  if (!applyees.empty())
+    p << " <- (" << llvm::interleaved(applyees) << ')';
+}
+
+LogicalResult FuseOp::verify() {
+  if (getApplyees().size() < 2)
+    return emitOpError() << "must apply to at least two loops";
+
+  if (getFirst() && getCount()) {
+    unsigned int first = getFirst().getInt();
+    unsigned int count = getCount().getInt();
+    if (first + count - 1 > getApplyees().size())
+      return emitOpError() << "the numbers of applyees must be at least first "
+                              "minus one plus count attributes";
+    if (!getGeneratees().empty() &&
+        getGeneratees().size() != getApplyees().size() + 1 - count)
+      return emitOpError() << "the number of generatees must be the number of "
+                              "aplyees plus one minus count";
+
+  } else {
+    if (!getGeneratees().empty() && getGeneratees().size() != 1)
+      return emitOpError()
+             << "in a complete fuse the number of generatees must be exactly 1";
+  }
+  for (auto &&applyee : getApplyees()) {
+    auto [create, gen, cons] = decodeCli(applyee);
+
+    if (!gen)
+      return emitOpError() << "applyee CLI has no generator";
+    auto loop = dyn_cast_or_null<CanonicalLoopOp>(gen->getOwner());
+    if (!loop)
+      return emitOpError()
+             << "currently only supports omp.canonical_loop as applyee";
+  }
+  return success();
+}
+std::pair<unsigned, unsigned> FuseOp ::getApplyeesODSOperandIndexAndLength() {
+  return getODSOperandIndexAndLength(odsIndex_applyees);
+}
+
+std::pair<unsigned, unsigned> FuseOp::getGenerateesODSOperandIndexAndLength() {
+  return getODSOperandIndexAndLength(odsIndex_generatees);
+}
+
 //===----------------------------------------------------------------------===//
 // Critical construct (2.17.1)
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 8edec990eaaba..e6880ce33b061 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3207,6 +3207,57 @@ static LogicalResult applyTile(omp::TileOp op, llvm::IRBuilderBase &builder,
   return success();
 }
 
+/// Apply a `#pragma omp fuse` / `!$omp fuse` transformation using the
+/// OpenMPIRBuilder.
+static LogicalResult applyFuse(omp::FuseOp op, llvm::IRBuilderBase &builder,
+                               LLVM::ModuleTranslation &moduleTranslation) {
+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+  llvm::OpenMPIRBuilder::LocationDescription loc(builder);
+
+  unsigned int first = 0;
+  unsigned int count = 0;
+  if (op.getFirst() && op.getCount()) {
+    first = op.getFirst().getInt();
+    count = op.getCount().getInt();
+  }
+
+  // Select what CLIs are going to be fused
+  SmallVector<llvm::CanonicalLoopInfo *> beforeFuse, toFuse, afterFuse;
+  for (size_t i = 0; i < op.getApplyees().size(); i++) {
+    Value applyee = op.getApplyees()[i];
+    llvm::CanonicalLoopInfo *consBuilderCLI =
+        moduleTranslation.lookupOMPLoop(applyee);
+    assert(applyee && "Canonical loop must already been translated");
+    if (first != 0 && i < first - 1)
+      beforeFuse.push_back(consBuilderCLI);
+    else if (count != 0 && i >= first + count - 1)
+      afterFuse.push_back(consBuilderCLI);
+    else
+      toFuse.push_back(consBuilderCLI);
+  }
+  assert(
+      (op.getGeneratees().empty() ||
+       beforeFuse.size() + afterFuse.size() + 1 == op.getGeneratees().size()) &&
+      "Wrong number of generatees");
+
+  // do the fuse
+  auto generatedLoop = ompBuilder->fuseLoops(loc.DL, toFuse);
+  if (!op.getGeneratees().empty()) {
+    size_t i = 0;
+    for (; i < beforeFuse.size(); i++)
+      moduleTranslation.mapOmpLoop(op.getGeneratees()[i], beforeFuse[i]);
+    moduleTranslation.mapOmpLoop(op.getGeneratees()[i++], generatedLoop);
+    for (; i < afterFuse.size(); i++)
+      moduleTranslation.mapOmpLoop(op.getGeneratees()[i], afterFuse[i]);
+  }
+
+  // CLIs can only be consumed once
+  for (Value applyee : op.getApplyees())
+    moduleTranslation.invalidateOmpLoop(applyee);
+
+  return success();
+}
+
 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
 static llvm::AtomicOrdering
 convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
@@ -6288,6 +6339,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
           .Case([&](omp::TileOp op) {
             return applyTile(op, builder, moduleTranslation);
           })
+          .Case([&](omp::FuseOp op) {
+            return applyFuse(op, builder, moduleTranslation);
+          })
           .Case([&](omp::TargetAllocMemOp) {
             return convertTargetAllocMemOp(*op, builder, moduleTranslation);
           })
diff --git a/mlir/test/Dialect/OpenMP/cli-fuse.mlir b/mlir/test/Dialect/OpenMP/cli-fuse.mlir
new file mode 100644
index 0000000000000..284b8c914ae1f
--- /dev/null
+++ b/mlir/test/Dialect/OpenMP/cli-fuse.mlir
@@ -0,0 +1,114 @@
+// RUN: mlir-opt %s            | FileCheck %s --enable-var-scope
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope
+
+
+// Raw syntax check (MLIR output is always pretty-printed)
+// CHECK-LABEL: @omp_fuse_raw(
+// CHECK-SAME: %[[tc1:.+]]: i32, %[[tc2:.+]]: i32) {
+func.func @omp_fuse_raw(%tc1 : i32, %tc2 : i32) -> () {
+  // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+  %canonloop_s0 = "omp.new_cli" () : () -> (!omp.cli)
+  // CHECK-NEXT: %canonloop_s1 = omp.new_cli
+  %canonloop_s1 = "omp.new_cli" () : () -> (!omp.cli)
+  // CHECK-NEXT: %fused = omp.new_cli
+  %fused = "omp.new_cli" () : () -> (!omp.cli)
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc1]]) {
+  "omp.canonical_loop" (%tc1, %canonloop_s0) ({
+    ^bb0(%iv_s0: i32):
+      // CHECK: omp.terminator
+      omp.terminator
+  }) : (i32, !omp.cli) -> ()
+  // CHECK: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc2]]) {
+  "omp.canonical_loop" (%tc2, %canonloop_s1) ({
+    ^bb0(%iv_s1: i32):
+      // CHECK: omp.terminator
+      omp.terminator
+  }) : (i32, !omp.cli) -> ()
+  // CHECK: omp.fuse (%fused) <- (%canonloop_s0, %canonloop_s1)
+  "omp.fuse"(%fused,  %canonloop_s0, %canonloop_s1) <{operandSegmentSizes = array<i32: 1, 2>}> : (!omp.cli,  !omp.cli, !omp.cli) -> ()
+  return
+}
+
+// Pretty syntax check
+// CHECK-LABEL: @omp_fuse_pretty(
+// CHECK-SAME: %[[tc1:.+]]: i32, %[[tc2:.+]]: i32) {
+func.func @omp_fuse_pretty(%tc1 : i32, %tc2 : i32) -> () {
+  // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+  %canonloop_s0 = omp.new_cli
+  // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+  %canonloop_s1 = omp.new_cli
+  // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+  %fused = omp.new_cli
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc1]]) {
+  omp.canonical_loop (%canonloop_s0) %iv_s0 : i32 in range(%tc1) {
+      // CHECK: omp.terminator
+      omp.terminator
+  }
+  // CHECK: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc2]]) {
+  omp.canonical_loop (%canonloop_s1) %iv_s1 : i32 in range(%tc2) {
+      // CHECK: omp.terminator
+      omp.terminator
+  }
+  // CHECK: omp.fuse (%fused) <- (%canonloop_s0, %canonloop_s1)
+  omp.fuse(%fused) <- (%canonloop_s0, %canonloop_s1) 
+  return
+}
+
+// Specifying the generatees for omp.fuse is optional
+// CHECK-LABEL: @omp_fuse_optionalgen_pretty(
+// CHECK-SAME: %[[tc1:.+]]: i32, %[[tc2:.+]]: i32) {
+func.func @omp_fuse_optionalgen_pretty(%tc1 : i32, %tc2 : i32) -> () {
+  // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+  %canonloop_s0 = omp.new_cli
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc1]]) {
+  omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%tc1) {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  // CHECK: %canonloop_s1 = omp.new_cli
+  %canonloop_s1 = omp.new_cli
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc2]]) {
+  omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%tc2) {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  // CHECK: omp.fuse <- (%canonloop_s0, %canonloop_s1)
+  omp.fuse <- (%canonloop_s0, %canonloop_s1)
+  return
+}
+
+// Fuse with looprange attributes
+// CHECK-LABEL: @omp_fuse_looprange(
+// CHECK-SAME: %[[tc1:.+]]: i32, %[[tc2:.+]]: i32, %[[tc3:.+]]: i32) {
+func.func @omp_fuse_looprange(%tc1 : i32, %tc2 : i32, %tc3 : i32) -> () {
+  // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+  %canonloop_s0 = omp.new_cli
+  // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+  %canonloop_s1 = omp.new_cli
+  // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+  %canonloop_s2 = omp.new_cli
+  // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+  %canonloop_fuse = omp.new_cli
+  // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+  %fused = omp.new_cli
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc1]]) {
+  omp.canonical_loop (%canonloop_s0) %iv_s0 : i32 in range(%tc1) {
+      // CHECK: omp.terminator
+      omp.terminator
+  }
+  // CHECK: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc2]]) {
+  omp.canonical_loop (%canonloop_s1) %iv_s1 : i32 in range(%tc2) {
+      // CHECK: omp.terminator
+      omp.terminator
+  }
+  // CHECK: omp.canonical_loop(%canonloop_s2) %iv_s2 : i32 in range(%[[tc3]]) {
+  omp.canonical_loop (%canonloop_s2) %iv_s2 : i32 in range(%tc3) {
+      // CHECK: omp.terminator
+      omp.terminator
+  }
+  // CHECK: omp.fuse (%canonloop_fuse, %fused) <- (%canonloop_s0,
+  // %canonloop_s1, %canonloop_s2) {count = 2 : i32, first = 1 : i32}
+  omp.fuse(%fused, %canonloop_fuse) <- (%canonloop_s0, %canonloop_s1, %canonloop_s2) {count = 2 : i32, first = 1 : i32}
+  return
+}
+
diff --git a/mlir/test/Dialect/OpenMP/invalid-fuse.mlir b/mlir/test/Dialect/OpenMP/invalid-fuse.mlir
new file mode 100644
index 0000000000000..d763ffcea71a2
--- /dev/null
+++ b/mlir/test/Dialect/OpenMP/invalid-fuse.mlir
@@ -0,0 +1,100 @@
+// RUN: mlir-opt -split-input-file -verify-diagnostics %s
+
+
+func.func @no_loops(%tc1 : i32, %tc2 : i32) {
+  // expected-error at +1 {{'omp.fuse' op must apply to at least two loops}}
+  omp.fuse <-()
+
+  return
+}
+
+// -----
+
+func.func @one_loop(%tc1 : i32, %tc2 : i32) {
+  %canonloop = omp.new_cli
+  omp.canonical_loop(%canonloop) %iv : i32 in range(%tc1) {
+    omp.terminator
+  }
+  // expected-error at +1 {{'omp.fuse' op must apply to at least two loops}}
+  omp.fuse <-(%canonloop)
+
+  return
+}
+
+// -----
+
+func.func @missing_generator(%tc1 : i32, %tc2 : i32) {
+  // expected-error at +1 {{'omp.new_cli' op CLI has no generator}}
+  %canonloop = omp.new_cli
+
+  // expected-note at +1 {{see consumer here: "omp.fuse"(%0) <{operandSegmentSizes = array<i32: 0, 1>}> : (!omp.cli) -> ()}}
+  omp.fuse <-(%canonloop)
+
+  return
+}
+
+// -----
+
+func.func @wrong_generatees1(%tc1 : i32, %tc2 : i32) {
+  %canonloop1 = omp.new_cli
+  %canonloop2 = omp.new_cli
+  omp.canonical_loop(%canonloop1) %iv : i32 in range(%tc1) {
+    omp.terminator
+  }
+  omp.canonical_loop(%canonloop2) %iv : i32 in range(%tc2) {
+    omp.terminator
+  }
+
+  %fused1 = omp.new_cli
+  %fused2 = omp.new_cli
+  // expected-error at +1 {{'omp.fuse' op in a complete fuse the number of generatees must be exactly 1}}
+  omp.fuse (%fused1, %fused2) <-(%canonloop1, %canonloop2) 
+
+  llvm.return
+}
+
+// -----
+
+func.func @wrong_generatees2(%tc1 : i32, %tc2 : i32, %tc3 : i32) {
+  %canonloop1 = omp.new_cli
+  %canonloop2 = omp.new_cli
+  %canonloop3 = omp.new_cli
+  omp.canonical_loop(%canonloop1) %iv : i32 in range(%tc1) {
+    omp.terminator
+  }
+  omp.canonical_loop(%canonloop2) %iv : i32 in range(%tc2) {
+    omp.terminator
+  }
+  omp.canonical_loop(%canonloop3) %iv : i32 in range(%tc3) {
+    omp.terminator
+  }
+
+  %fused = omp.new_cli
+  // expected-error at +1 {{'omp.fuse' op the number of generatees must be the number of aplyees plus one minus count}} 
+  omp.fuse (%fused) <-(%canonloop1, %canonloop2, %canonloop3) {first = 1 : i32, count = 2 : i32}
+
+  llvm.return
+}
+
+func.func @wrong_applyees(%tc1 : i32, %tc2 : i32, %tc3 : i32) {
+  %canonloop1 = omp.new_cli
+  %canonloop2 = omp.new_cli
+  %canonloop3 = omp.new_cli
+  omp.canonical_loop(%canonloop1) %iv : i32 in range(%tc1) {
+    omp.terminator
+  }
+  omp.canonical_loop(%canonloop2) %iv : i32 in range(%tc2) {
+    omp.terminator
+  }
+  omp.canonical_loop(%canonloop3) %iv : i32 in range(%tc3) {
+    omp.terminator
+  }
+
+  %fused = omp.new_cli
+  %canonloop_fuse = omp.new_cli
+  // expected-error at +1 {{'omp.fuse' op the numbers of applyees must be at least first minus one plus count attributes}}
+  omp.fuse (%fused, %canonloop_fuse) <-(%canonloop1, %canonloop2, %canonloop3) {first = 1 : i32, count = 5 : i32}
+
+  llvm.return
+}
+
diff --git a/mlir/test/Target/LLVMIR/openmp-cli-fuse01.mlir b/mlir/test/Target/LLVMIR/openmp-cli-fuse01.mlir
new file mode 100644
index 0000000000000..0754572b24771
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-cli-fuse01.mlir
@@ -0,0 +1,100 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s --enable-var-scope
+
+
+llvm.func @fuse_trivial_loops(%baseptr: !llvm.ptr, %tc1: i32, %tc2: i32) -> () {
+  %literal_cli1 = omp.new_cli
+  omp.canonical_loop(%literal_cli1) %iv1 : i32 in range(%tc1) {
+    %ptr = llvm.getelementptr inbounds %baseptr[%iv1] : (!llvm.ptr, i32) -> !llvm.ptr, f32
+    %val = llvm.mlir.constant(42.0 : f32) : f32
+    llvm.store %val, %ptr : f32, !llvm.ptr
+    omp.terminator
+  }
+  %literal_cli2 = omp.new_cli
+  omp.canonical_loop(%literal_cli2) %iv2 : i32 in range(%tc2) {
+    %ptr = llvm.getelementptr inbounds %baseptr[%iv2] : (!llvm.ptr, i32) -> !llvm.ptr, f32
+    %val = llvm.mlir.constant(21.0 : f32) : f32
+    llvm.store %val, %ptr : f32, !llvm.ptr
+    omp.terminator
+  }
+  omp.fuse <- (%literal_cli1, %literal_cli2)
+  llvm.return
+}
+
+// CHECK-LABEL:    define void @fuse_trivial_loops(
+// CHECK-SAME:       ptr %[[VAL_11:.+]], i32 %[[VAL_5:.+]], i32 %[[VAL_16:.+]]) {
+// CHECK-NEXT:       br label %[[OMP_OMP_LOOP_PREHEADER:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_OMP_LOOP_PREHEADER]]:
+// CHECK-NEXT:       br label %[[OMP_OMP_LOOP_AFTER:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_OMP_LOOP_AFTER]]:
+// CHECK-NEXT:       br label %[[OMP_OMP_LOOP_PREHEADER1:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_OMP_LOOP_PREHEADER1]]:
+// CHECK-NEXT:       br label %[[OMP_FUSE_COMP_TC:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_FUSE_COMP_TC]]:
+// CHECK-NEXT:       %[[VAL_15:.+]] = icmp sgt i32 %[[VAL_5:.+]], %[[VAL_16:.+]]
+// CHECK-NEXT:       %[[VAL_17:.+]] = select i1 %[[VAL_15:.+]], i32 %[[VAL_5:.+]], i32 %[[VAL_16:.+]]
+// CHECK-NEXT:       br label %[[OMP_FUSED_PREHEADER:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_FUSED_PREHEADER]]:
+// CHECK-NEXT:       br label %[[OMP_FUSED_HEADER:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_FUSED_HEADER]]:
+// CHECK-NEXT:       %[[VAL_4:.+]] = phi i32 [ 0, %[[VAL_18:.+]] ], [ %[[VAL_27:.+]], %[[VAL_26:.+]] ]
+// CHECK-NEXT:       br label %[[OMP_FUSED_COND:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_FUSED_COND]]:
+// CHECK-NEXT:       %[[VAL_29:.+]] = icmp ult i32 %[[VAL_4:.+]], %[[VAL_17:.+]]
+// CHECK-NEXT:       br i1 %[[VAL_29:.+]], label %[[OMP_FUSED_BODY:.+]], label %[[OMP_FUSED_EXIT:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_FUSED_BODY]]:
+// CHECK-NEXT:       br label %[[OMP_FUSED_INNER_COND:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_FUSED_INNER_COND]]:
+// CHECK-NEXT:       %[[VAL_3:.+]] = icmp slt i32 %[[VAL_4:.+]], %[[VAL_5:.+]]
+// CHECK-NEXT:       br i1 %[[VAL_3:.+]], label %[[OMP_OMP_LOOP_BODY:.+]], label %[[OMP_FUSED_INNER_COND13:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_OMP_LOOP_BODY]]:
+// CHECK-NEXT:       br label %[[OMP_LOOP_REGION:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_LOOP_REGION]]:
+// CHECK-NEXT:       %[[VAL_10:.+]] = getelementptr inbounds float, ptr %[[VAL_11:.+]], i32 %[[VAL_4:.+]]
+// CHECK-NEXT:       store float 4.200000e+01, ptr %[[VAL_10:.+]], align 4
+// CHECK-NEXT:       br label %[[OMP_REGION_CONT:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_REGION_CONT]]:
+// CHECK-NEXT:       br label %[[OMP_FUSED_INNER_COND13:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_FUSED_INNER_COND13]]:
+// CHECK-NEXT:       %[[VAL_19:.+]] = icmp slt i32 %[[VAL_4:.+]], %[[VAL_16:.+]]
+// CHECK-NEXT:       br i1 %[[VAL_19:.+]], label %[[OMP_OMP_LOOP_BODY4:.+]], label %[[OMP_FUSED_PRE_LATCH:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_OMP_LOOP_BODY4]]:
+// CHECK-NEXT:       br label %[[OMP_LOOP_REGION12:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_LOOP_REGION12]]:
+// CHECK-NEXT:       %[[VAL_23:.+]] = getelementptr inbounds float, ptr %[[VAL_11:.+]], i32 %[[VAL_4:.+]]
+// CHECK-NEXT:       store float 2.100000e+01, ptr %[[VAL_23:.+]], align 4
+// CHECK-NEXT:       br label %[[OMP_REGION_CONT11:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_REGION_CONT11]]:
+// CHECK-NEXT:       br label %[[OMP_FUSED_PRE_LATCH:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_FUSED_PRE_LATCH]]:
+// CHECK-NEXT:       br label %[[OMP_FUSED_INC:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_FUSED_INC]]:
+// CHECK-NEXT:       %[[VAL_27:.+]] = add nuw i32 %[[VAL_4:.+]], 1
+// CHECK-NEXT:       br label %[[OMP_FUSED_HEADER:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_FUSED_EXIT]]:
+// CHECK-NEXT:       br label %[[OMP_FUSED_AFTER:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_FUSED_AFTER]]:
+// CHECK-NEXT:       br label %[[OMP_OMP_LOOP_AFTER7:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:     [[OMP_OMP_LOOP_AFTER7]]:
+// CHECK-NEXT:       ret void
+
diff --git a/mlir/test/Target/LLVMIR/openmp-cli-fuse02.mlir b/mlir/test/Target/LLVMIR/openmp-cli-fuse02.mlir
new file mode 100644
index 0000000000000..0032bd86501d0
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-cli-fuse02.mlir
@@ -0,0 +1,140 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s --enable-var-scope
+
+
+llvm.func @fuse_looprange_loops(%baseptr: !llvm.ptr, %tc1: i32, %tc2: i32, %tc3: i32) -> () {
+  %literal_cli1 = omp.new_cli
+  omp.canonical_loop(%literal_cli1) %iv1 : i32 in range(%tc1) {
+    %ptr = llvm.getelementptr inbounds %baseptr[%iv1] : (!llvm.ptr, i32) -> !llvm.ptr, f32
+    %val = llvm.mlir.constant(42.0 : f32) : f32
+    llvm.store %val, %ptr : f32, !llvm.ptr
+    omp.terminator
+  }
+  %literal_cli2 = omp.new_cli
+  omp.canonical_loop(%literal_cli2) %iv2 : i32 in range(%tc2) {
+    %ptr = llvm.getelementptr inbounds %baseptr[%iv2] : (!llvm.ptr, i32) -> !llvm.ptr, f32
+    %val = llvm.mlir.constant(21.0 : f32) : f32
+    llvm.store %val, %ptr : f32, !llvm.ptr
+    omp.terminator
+  }
+  %literal_cli3 = omp.new_cli
+  omp.canonical_loop(%literal_cli3) %iv3 : i32 in range(%tc3) {
+    %ptr = llvm.getelementptr inbounds %baseptr[%iv3] : (!llvm.ptr, i32) -> !llvm.ptr, f32
+    %val = llvm.mlir.constant(63.0 : f32) : f32
+    llvm.store %val, %ptr : f32, !llvm.ptr
+    omp.terminator
+  }
+  omp.fuse <- (%literal_cli1, %literal_cli2, %literal_cli3) {first = 1 : i32, count = 2 : i32}
+  llvm.return
+}
+
+
+// CHECK-LABEL:   define void @fuse_looprange_loops(
+// CHECK-SAME:      ptr %[[VAL_23:.+]], i32 %[[VAL_5:.+]], i32 %[[VAL_6:.+]], i32 %[[VAL_40:.+]]) {
+// CHECK-NEXT:      br label %[[OMP_OMP_LOOP_PREHEADER:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_OMP_LOOP_PREHEADER]]:
+// CHECK-NEXT:      br label %[[OMP_OMP_LOOP_AFTER:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_OMP_LOOP_AFTER]]:
+// CHECK-NEXT:      br label %[[OMP_OMP_LOOP_PREHEADER1:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_OMP_LOOP_PREHEADER1]]:
+// CHECK-NEXT:      br label %[[OMP_FUSE_COMP_TC:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_FUSE_COMP_TC]]:
+// CHECK-NEXT:      %[[VAL_4:.+]] = icmp sgt i32 %[[VAL_5:.+]], %[[VAL_6:.+]]
+// CHECK-NEXT:      %[[VAL_7:.+]] = select i1 %[[VAL_4:.+]], i32 %[[VAL_5:.+]], i32 %[[VAL_6:.+]]
+// CHECK-NEXT:      br label %[[OMP_FUSED_PREHEADER:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_FUSED_PREHEADER]]:
+// CHECK-NEXT:      br label %[[OMP_FUSED_HEADER:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_FUSED_HEADER]]:
+// CHECK-NEXT:      %[[VAL_11:.+]] = phi i32 [ 0, %[[VAL_8:.+]] ], [ %[[VAL_12:.+]], %[[VAL_10:.+]] ]
+// CHECK-NEXT:      br label %[[OMP_FUSED_COND:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_FUSED_COND]]:
+// CHECK-NEXT:      %[[VAL_14:.+]] = icmp ult i32 %[[VAL_11:.+]], %[[VAL_7:.+]]
+// CHECK-NEXT:      br i1 %[[VAL_14:.+]], label %[[OMP_FUSED_BODY:.+]], label %[[OMP_FUSED_EXIT:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_FUSED_BODY]]:
+// CHECK-NEXT:      br label %[[OMP_FUSED_INNER_COND:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_FUSED_INNER_COND]]:
+// CHECK-NEXT:      %[[VAL_18:.+]] = icmp slt i32 %[[VAL_11:.+]], %[[VAL_5:.+]]
+// CHECK-NEXT:      br i1 %[[VAL_18:.+]], label %[[OMP_OMP_LOOP_BODY:.+]], label %[[OMP_FUSED_INNER_COND25:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_OMP_LOOP_BODY]]:
+// CHECK-NEXT:      br label %[[OMP_LOOP_REGION:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_LOOP_REGION]]:
+// CHECK-NEXT:      %[[VAL_22:.+]] = getelementptr inbounds float, ptr %[[VAL_23:.+]], i32 %[[VAL_11:.+]]
+// CHECK-NEXT:      store float 4.200000e+01, ptr %[[VAL_22:.+]], align 4
+// CHECK-NEXT:      br label %[[OMP_REGION_CONT:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_REGION_CONT]]:
+// CHECK-NEXT:      br label %[[OMP_FUSED_INNER_COND25:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_FUSED_INNER_COND25]]:
+// CHECK-NEXT:      %[[VAL_25:.+]] = icmp slt i32 %[[VAL_11:.+]], %[[VAL_6:.+]]
+// CHECK-NEXT:      br i1 %[[VAL_25:.+]], label %[[OMP_OMP_LOOP_BODY4:.+]], label %[[OMP_FUSED_PRE_LATCH:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_OMP_LOOP_BODY4]]:
+// CHECK-NEXT:      br label %[[OMP_LOOP_REGION12:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_LOOP_REGION12]]:
+// CHECK-NEXT:      %[[VAL_29:.+]] = getelementptr inbounds float, ptr %[[VAL_23:.+]], i32 %[[VAL_11:.+]]
+// CHECK-NEXT:      store float 2.100000e+01, ptr %[[VAL_29:.+]], align 4
+// CHECK-NEXT:      br label %[[OMP_REGION_CONT11:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_REGION_CONT11]]:
+// CHECK-NEXT:      br label %[[OMP_FUSED_PRE_LATCH:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_FUSED_PRE_LATCH]]:
+// CHECK-NEXT:      br label %[[OMP_FUSED_INC:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_FUSED_INC]]:
+// CHECK-NEXT:      %[[VAL_12:.+]] = add nuw i32 %[[VAL_11:.+]], 1
+// CHECK-NEXT:      br label %[[OMP_FUSED_HEADER:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_FUSED_EXIT]]:
+// CHECK-NEXT:      br label %[[OMP_FUSED_AFTER:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_FUSED_AFTER]]:
+// CHECK-NEXT:      br label %[[OMP_OMP_LOOP_AFTER7:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_OMP_LOOP_AFTER7]]:
+// CHECK-NEXT:      br label %[[OMP_OMP_LOOP_PREHEADER13:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_OMP_LOOP_PREHEADER13]]:
+// CHECK-NEXT:      br label %[[OMP_OMP_LOOP_HEADER14:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_OMP_LOOP_HEADER14]]:
+// CHECK-NEXT:      %[[VAL_36:.+]] = phi i32 [ 0, %[[VAL_33:.+]] ], [ %[[VAL_37:.+]], %[[VAL_35:.+]] ]
+// CHECK-NEXT:      br label %[[OMP_OMP_LOOP_COND15:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_OMP_LOOP_COND15]]:
+// CHECK-NEXT:      %[[VAL_39:.+]] = icmp ult i32 %[[VAL_36:.+]], %[[VAL_40:.+]]
+// CHECK-NEXT:      br i1 %[[VAL_39:.+]], label %[[OMP_OMP_LOOP_BODY16:.+]], label %[[OMP_OMP_LOOP_EXIT18:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_OMP_LOOP_BODY16]]:
+// CHECK-NEXT:      br label %[[OMP_LOOP_REGION24:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_LOOP_REGION24]]:
+// CHECK-NEXT:      %[[VAL_44:.+]] = getelementptr inbounds float, ptr %[[VAL_23:.+]], i32 %[[VAL_36:.+]]
+// CHECK-NEXT:      store float 6.300000e+01, ptr %[[VAL_44:.+]], align 4
+// CHECK-NEXT:      br label %[[OMP_REGION_CONT23:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_REGION_CONT23]]:
+// CHECK-NEXT:      br label %[[OMP_OMP_LOOP_INC17:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_OMP_LOOP_INC17]]:
+// CHECK-NEXT:      %[[VAL_37:.+]] = add nuw i32 %[[VAL_36:.+]], 1
+// CHECK-NEXT:      br label %[[OMP_OMP_LOOP_HEADER14:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_OMP_LOOP_EXIT18]]:
+// CHECK-NEXT:      br label %[[OMP_OMP_LOOP_AFTER19:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT:    [[OMP_OMP_LOOP_AFTER19]]:
+// CHECK-NEXT:      ret void
+
diff --git a/openmp/runtime/test/transform/fuse/do-looprange.f90 b/openmp/runtime/test/transform/fuse/do-looprange.f90
new file mode 100644
index 0000000000000..8c62b24c4744f
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/do-looprange.f90
@@ -0,0 +1,60 @@
+! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe
+! RUN: %t.exe | FileCheck %s --match-full-lines
+
+program fuse_full
+  implicit none
+  integer i, j, k, u
+
+  print *, 'do'
+
+  !$OMP FUSE LOOPRANGE(2,2)
+  do i=5, 25, 5
+    print '("i=", I0)', i
+  end do
+  do j=10, 100, 10
+    print '("j=", I0)', j
+  end do
+  do k=10, 0, -1
+    print '("k=", I0)', k
+  end do
+  do u=5, 25, 5
+    print '("u=", I0)', u
+  end do
+  !$OMP END FUSE
+
+  print *, 'done'
+end program
+
+! CHECK: do
+! CHECK-NEXT: i=5
+! CHECK-NEXT: i=10
+! CHECK-NEXT: i=15
+! CHECK-NEXT: i=20
+! CHECK-NEXT: i=25
+! CHECK-NEXT: j=10
+! CHECK-NEXT: k=10
+! CHECK-NEXT: j=20
+! CHECK-NEXT: k=9
+! CHECK-NEXT: j=30
+! CHECK-NEXT: k=8
+! CHECK-NEXT: j=40
+! CHECK-NEXT: k=7
+! CHECK-NEXT: j=50
+! CHECK-NEXT: k=6
+! CHECK-NEXT: j=60
+! CHECK-NEXT: k=5
+! CHECK-NEXT: j=70
+! CHECK-NEXT: k=4
+! CHECK-NEXT: j=80
+! CHECK-NEXT: k=3
+! CHECK-NEXT: j=90
+! CHECK-NEXT: k=2
+! CHECK-NEXT: j=100
+! CHECK-NEXT: k=1
+! CHECK-NEXT: k=0
+! CHECK-NEXT: u=5
+! CHECK-NEXT: u=10
+! CHECK-NEXT: u=15
+! CHECK-NEXT: u=20
+! CHECK-NEXT: u=25
+! CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/fuse/do.f90 b/openmp/runtime/test/transform/fuse/do.f90
new file mode 100644
index 0000000000000..d4496bce4d723
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/do.f90
@@ -0,0 +1,52 @@
+! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe
+! RUN: %t.exe | FileCheck %s --match-full-lines
+
+program fuse_full
+  implicit none
+  integer i, j, k
+
+  print *, 'do'
+
+  !$OMP FUSE
+  do i=5, 25, 5
+    print '("i=", I0)', i
+  end do
+  do j=10, 100, 10
+    print '("j=", I0)', j
+  end do
+  do k=10, 0, -1
+    print '("k=", I0)', k
+  end do
+  !$OMP END FUSE
+
+  print *, 'done'
+end program
+
+! CHECK: do
+! CHECK-NEXT: i=5
+! CHECK-NEXT: j=10
+! CHECK-NEXT: k=10
+! CHECK-NEXT: i=10
+! CHECK-NEXT: j=20
+! CHECK-NEXT: k=9
+! CHECK-NEXT: i=15
+! CHECK-NEXT: j=30
+! CHECK-NEXT: k=8
+! CHECK-NEXT: i=20
+! CHECK-NEXT: j=40
+! CHECK-NEXT: k=7
+! CHECK-NEXT: i=25
+! CHECK-NEXT: j=50
+! CHECK-NEXT: k=6
+! CHECK-NEXT: j=60
+! CHECK-NEXT: k=5
+! CHECK-NEXT: j=70
+! CHECK-NEXT: k=4
+! CHECK-NEXT: j=80
+! CHECK-NEXT: k=3
+! CHECK-NEXT: j=90
+! CHECK-NEXT: k=2
+! CHECK-NEXT: j=100
+! CHECK-NEXT: k=1
+! CHECK-NEXT: k=0
+! CHECK-NEXT: done