[Openmp-commits] [clang] [flang] [llvm] [openmp] [Clang][OpenMP][LoopTransformations] Add support for "#pragma omp fuse" loop transformation directive and "looprange" clause (PR #139293)

Roger Ferrer Ibáñez via Openmp-commits openmp-commits at lists.llvm.org
Thu Aug 28 08:06:10 PDT 2025


https://github.com/rofirrim updated https://github.com/llvm/llvm-project/pull/139293

>From c0a9364dde8eb90d10a8971d2f4598b96c05ac76 Mon Sep 17 00:00:00 2001
From: Roger Ferrer Ibanez <roger.ferrer at bsc.es>
Date: Tue, 26 Aug 2025 13:18:19 +0000
Subject: [PATCH 1/3] [Clang][NFC] Rename OMPLoopTransformationDirective to
 OMPCanonicalLoopNestTransformationDirective

Not all loop transformations makes sense to make them
OMPLoopBasedDirective, in particular in OpenMP 6.0 'fuse' (to be
implemented later) is a transformation of a canonical loop sequence.
---
 clang/include/clang/AST/StmtOpenMP.h      | 77 ++++++++++++-----------
 clang/include/clang/Basic/OpenMPKinds.h   |  7 +++
 clang/include/clang/Basic/StmtNodes.td    | 14 +++--
 clang/lib/AST/StmtOpenMP.cpp              | 13 ++--
 clang/lib/AST/StmtProfile.cpp             | 14 ++---
 clang/lib/Basic/OpenMPKinds.cpp           |  8 ++-
 clang/lib/CodeGen/CGStmtOpenMP.cpp        |  3 +-
 clang/lib/Sema/SemaOpenMP.cpp             |  6 +-
 clang/lib/Serialization/ASTReaderStmt.cpp | 14 ++---
 clang/lib/Serialization/ASTWriterStmt.cpp | 14 ++---
 clang/tools/libclang/CIndex.cpp           | 18 +++---
 11 files changed, 106 insertions(+), 82 deletions(-)

diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index 2fb33d3036bca..a436676113921 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -889,23 +889,24 @@ class OMPLoopBasedDirective : public OMPExecutableDirective {
 
   /// Calls the specified callback function for all the loops in \p CurStmt,
   /// from the outermost to the innermost.
-  static bool
-  doForAllLoops(Stmt *CurStmt, bool TryImperfectlyNestedLoops,
-                unsigned NumLoops,
-                llvm::function_ref<bool(unsigned, Stmt *)> Callback,
-                llvm::function_ref<void(OMPLoopTransformationDirective *)>
-                    OnTransformationCallback);
+  static bool doForAllLoops(
+      Stmt *CurStmt, bool TryImperfectlyNestedLoops, unsigned NumLoops,
+      llvm::function_ref<bool(unsigned, Stmt *)> Callback,
+      llvm::function_ref<void(OMPCanonicalLoopNestTransformationDirective *)>
+          OnTransformationCallback);
   static bool
   doForAllLoops(const Stmt *CurStmt, bool TryImperfectlyNestedLoops,
                 unsigned NumLoops,
                 llvm::function_ref<bool(unsigned, const Stmt *)> Callback,
-                llvm::function_ref<void(const OMPLoopTransformationDirective *)>
+                llvm::function_ref<
+                    void(const OMPCanonicalLoopNestTransformationDirective *)>
                     OnTransformationCallback) {
     auto &&NewCallback = [Callback](unsigned Cnt, Stmt *CurStmt) {
       return Callback(Cnt, CurStmt);
     };
     auto &&NewTransformCb =
-        [OnTransformationCallback](OMPLoopTransformationDirective *A) {
+        [OnTransformationCallback](
+            OMPCanonicalLoopNestTransformationDirective *A) {
           OnTransformationCallback(A);
         };
     return doForAllLoops(const_cast<Stmt *>(CurStmt), TryImperfectlyNestedLoops,
@@ -918,7 +919,7 @@ class OMPLoopBasedDirective : public OMPExecutableDirective {
   doForAllLoops(Stmt *CurStmt, bool TryImperfectlyNestedLoops,
                 unsigned NumLoops,
                 llvm::function_ref<bool(unsigned, Stmt *)> Callback) {
-    auto &&TransformCb = [](OMPLoopTransformationDirective *) {};
+    auto &&TransformCb = [](OMPCanonicalLoopNestTransformationDirective *) {};
     return doForAllLoops(CurStmt, TryImperfectlyNestedLoops, NumLoops, Callback,
                          TransformCb);
   }
@@ -955,19 +956,18 @@ class OMPLoopBasedDirective : public OMPExecutableDirective {
   }
 };
 
-/// The base class for all loop transformation directives.
-class OMPLoopTransformationDirective : public OMPLoopBasedDirective {
+/// The base class for all transformation directives of canonical loop nests.
+class OMPCanonicalLoopNestTransformationDirective
+    : public OMPLoopBasedDirective {
   friend class ASTStmtReader;
 
   /// Number of loops generated by this loop transformation.
   unsigned NumGeneratedLoops = 0;
 
 protected:
-  explicit OMPLoopTransformationDirective(StmtClass SC,
-                                          OpenMPDirectiveKind Kind,
-                                          SourceLocation StartLoc,
-                                          SourceLocation EndLoc,
-                                          unsigned NumAssociatedLoops)
+  explicit OMPCanonicalLoopNestTransformationDirective(
+      StmtClass SC, OpenMPDirectiveKind Kind, SourceLocation StartLoc,
+      SourceLocation EndLoc, unsigned NumAssociatedLoops)
       : OMPLoopBasedDirective(SC, Kind, StartLoc, EndLoc, NumAssociatedLoops) {}
 
   /// Set the number of loops generated by this loop transformation.
@@ -5545,7 +5545,8 @@ class OMPTargetTeamsDistributeSimdDirective final : public OMPLoopDirective {
 };
 
 /// This represents the '#pragma omp tile' loop transformation directive.
-class OMPTileDirective final : public OMPLoopTransformationDirective {
+class OMPTileDirective final
+    : public OMPCanonicalLoopNestTransformationDirective {
   friend class ASTStmtReader;
   friend class OMPExecutableDirective;
 
@@ -5557,9 +5558,9 @@ class OMPTileDirective final : public OMPLoopTransformationDirective {
 
   explicit OMPTileDirective(SourceLocation StartLoc, SourceLocation EndLoc,
                             unsigned NumLoops)
-      : OMPLoopTransformationDirective(OMPTileDirectiveClass,
-                                       llvm::omp::OMPD_tile, StartLoc, EndLoc,
-                                       NumLoops) {
+      : OMPCanonicalLoopNestTransformationDirective(
+            OMPTileDirectiveClass, llvm::omp::OMPD_tile, StartLoc, EndLoc,
+            NumLoops) {
     setNumGeneratedLoops(2 * NumLoops);
   }
 
@@ -5622,7 +5623,8 @@ class OMPTileDirective final : public OMPLoopTransformationDirective {
 };
 
 /// This represents the '#pragma omp stripe' loop transformation directive.
-class OMPStripeDirective final : public OMPLoopTransformationDirective {
+class OMPStripeDirective final
+    : public OMPCanonicalLoopNestTransformationDirective {
   friend class ASTStmtReader;
   friend class OMPExecutableDirective;
 
@@ -5634,9 +5636,9 @@ class OMPStripeDirective final : public OMPLoopTransformationDirective {
 
   explicit OMPStripeDirective(SourceLocation StartLoc, SourceLocation EndLoc,
                               unsigned NumLoops)
-      : OMPLoopTransformationDirective(OMPStripeDirectiveClass,
-                                       llvm::omp::OMPD_stripe, StartLoc, EndLoc,
-                                       NumLoops) {
+      : OMPCanonicalLoopNestTransformationDirective(
+            OMPStripeDirectiveClass, llvm::omp::OMPD_stripe, StartLoc, EndLoc,
+            NumLoops) {
     setNumGeneratedLoops(2 * NumLoops);
   }
 
@@ -5702,7 +5704,8 @@ class OMPStripeDirective final : public OMPLoopTransformationDirective {
 /// #pragma omp unroll
 /// for (int i = 0; i < 64; ++i)
 /// \endcode
-class OMPUnrollDirective final : public OMPLoopTransformationDirective {
+class OMPUnrollDirective final
+    : public OMPCanonicalLoopNestTransformationDirective {
   friend class ASTStmtReader;
   friend class OMPExecutableDirective;
 
@@ -5713,9 +5716,9 @@ class OMPUnrollDirective final : public OMPLoopTransformationDirective {
   };
 
   explicit OMPUnrollDirective(SourceLocation StartLoc, SourceLocation EndLoc)
-      : OMPLoopTransformationDirective(OMPUnrollDirectiveClass,
-                                       llvm::omp::OMPD_unroll, StartLoc, EndLoc,
-                                       1) {}
+      : OMPCanonicalLoopNestTransformationDirective(OMPUnrollDirectiveClass,
+                                                    llvm::omp::OMPD_unroll,
+                                                    StartLoc, EndLoc, 1) {}
 
   /// Set the pre-init statements.
   void setPreInits(Stmt *PreInits) {
@@ -5776,7 +5779,8 @@ class OMPUnrollDirective final : public OMPLoopTransformationDirective {
 /// for (int i = 0; i < n; ++i)
 ///   ...
 /// \endcode
-class OMPReverseDirective final : public OMPLoopTransformationDirective {
+class OMPReverseDirective final
+    : public OMPCanonicalLoopNestTransformationDirective {
   friend class ASTStmtReader;
   friend class OMPExecutableDirective;
 
@@ -5788,9 +5792,9 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective {
 
   explicit OMPReverseDirective(SourceLocation StartLoc, SourceLocation EndLoc,
                                unsigned NumLoops)
-      : OMPLoopTransformationDirective(OMPReverseDirectiveClass,
-                                       llvm::omp::OMPD_reverse, StartLoc,
-                                       EndLoc, NumLoops) {
+      : OMPCanonicalLoopNestTransformationDirective(
+            OMPReverseDirectiveClass, llvm::omp::OMPD_reverse, StartLoc, EndLoc,
+            NumLoops) {
     setNumGeneratedLoops(NumLoops);
   }
 
@@ -5848,7 +5852,8 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective {
 ///     for (int j = 0; j < n; ++j)
 ///       ..
 /// \endcode
-class OMPInterchangeDirective final : public OMPLoopTransformationDirective {
+class OMPInterchangeDirective final
+    : public OMPCanonicalLoopNestTransformationDirective {
   friend class ASTStmtReader;
   friend class OMPExecutableDirective;
 
@@ -5860,9 +5865,9 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective {
 
   explicit OMPInterchangeDirective(SourceLocation StartLoc,
                                    SourceLocation EndLoc, unsigned NumLoops)
-      : OMPLoopTransformationDirective(OMPInterchangeDirectiveClass,
-                                       llvm::omp::OMPD_interchange, StartLoc,
-                                       EndLoc, NumLoops) {
+      : OMPCanonicalLoopNestTransformationDirective(
+            OMPInterchangeDirectiveClass, llvm::omp::OMPD_interchange, StartLoc,
+            EndLoc, NumLoops) {
     setNumGeneratedLoops(NumLoops);
   }
 
diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h
index f40db4c13c55a..d3285cd9c6a14 100644
--- a/clang/include/clang/Basic/OpenMPKinds.h
+++ b/clang/include/clang/Basic/OpenMPKinds.h
@@ -365,6 +365,13 @@ bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind);
 /// functions
 bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind);
 
+/// Checks if the specified directive is a loop transformation directive that
+/// applies to a canonical loop nest.
+/// \param DKind Specified directive.
+/// \return True iff the directive is a loop transformation.
+bool isOpenMPCanonicalLoopNestTransformationDirective(
+    OpenMPDirectiveKind DKind);
+
 /// Checks if the specified directive is a loop transformation directive.
 /// \param DKind Specified directive.
 /// \return True iff the directive is a loop transformation.
diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index c9c173f5c7469..781577549573d 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -227,12 +227,14 @@ def OMPLoopBasedDirective : StmtNode<OMPExecutableDirective, 1>;
 def OMPLoopDirective : StmtNode<OMPLoopBasedDirective, 1>;
 def OMPParallelDirective : StmtNode<OMPExecutableDirective>;
 def OMPSimdDirective : StmtNode<OMPLoopDirective>;
-def OMPLoopTransformationDirective : StmtNode<OMPLoopBasedDirective, 1>;
-def OMPTileDirective : StmtNode<OMPLoopTransformationDirective>;
-def OMPStripeDirective : StmtNode<OMPLoopTransformationDirective>;
-def OMPUnrollDirective : StmtNode<OMPLoopTransformationDirective>;
-def OMPReverseDirective : StmtNode<OMPLoopTransformationDirective>;
-def OMPInterchangeDirective : StmtNode<OMPLoopTransformationDirective>;
+def OMPCanonicalLoopNestTransformationDirective
+    : StmtNode<OMPLoopBasedDirective, 1>;
+def OMPTileDirective : StmtNode<OMPCanonicalLoopNestTransformationDirective>;
+def OMPStripeDirective : StmtNode<OMPCanonicalLoopNestTransformationDirective>;
+def OMPUnrollDirective : StmtNode<OMPCanonicalLoopNestTransformationDirective>;
+def OMPReverseDirective : StmtNode<OMPCanonicalLoopNestTransformationDirective>;
+def OMPInterchangeDirective
+    : StmtNode<OMPCanonicalLoopNestTransformationDirective>;
 def OMPForDirective : StmtNode<OMPLoopDirective>;
 def OMPForSimdDirective : StmtNode<OMPLoopDirective>;
 def OMPSectionsDirective : StmtNode<OMPExecutableDirective>;
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index 2eeb5e45ab511..36ecaf6489ef0 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -125,12 +125,13 @@ OMPLoopBasedDirective::tryToFindNextInnerLoop(Stmt *CurStmt,
 bool OMPLoopBasedDirective::doForAllLoops(
     Stmt *CurStmt, bool TryImperfectlyNestedLoops, unsigned NumLoops,
     llvm::function_ref<bool(unsigned, Stmt *)> Callback,
-    llvm::function_ref<void(OMPLoopTransformationDirective *)>
+    llvm::function_ref<void(OMPCanonicalLoopNestTransformationDirective *)>
         OnTransformationCallback) {
   CurStmt = CurStmt->IgnoreContainers();
   for (unsigned Cnt = 0; Cnt < NumLoops; ++Cnt) {
     while (true) {
-      auto *Dir = dyn_cast<OMPLoopTransformationDirective>(CurStmt);
+      auto *Dir =
+          dyn_cast<OMPCanonicalLoopNestTransformationDirective>(CurStmt);
       if (!Dir)
         break;
 
@@ -369,11 +370,11 @@ OMPForDirective *OMPForDirective::Create(
   return Dir;
 }
 
-Stmt *OMPLoopTransformationDirective::getTransformedStmt() const {
+Stmt *OMPCanonicalLoopNestTransformationDirective::getTransformedStmt() const {
   switch (getStmtClass()) {
 #define STMT(CLASS, PARENT)
 #define ABSTRACT_STMT(CLASS)
-#define OMPLOOPTRANSFORMATIONDIRECTIVE(CLASS, PARENT)                          \
+#define OMPCANONICALLOOPNESTTRANSFORMATIONDIRECTIVE(CLASS, PARENT)             \
   case Stmt::CLASS##Class:                                                     \
     return static_cast<const CLASS *>(this)->getTransformedStmt();
 #include "clang/AST/StmtNodes.inc"
@@ -382,11 +383,11 @@ Stmt *OMPLoopTransformationDirective::getTransformedStmt() const {
   }
 }
 
-Stmt *OMPLoopTransformationDirective::getPreInits() const {
+Stmt *OMPCanonicalLoopNestTransformationDirective::getPreInits() const {
   switch (getStmtClass()) {
 #define STMT(CLASS, PARENT)
 #define ABSTRACT_STMT(CLASS)
-#define OMPLOOPTRANSFORMATIONDIRECTIVE(CLASS, PARENT)                          \
+#define OMPCANONICALLOOPNESTTRANSFORMATIONDIRECTIVE(CLASS, PARENT)             \
   case Stmt::CLASS##Class:                                                     \
     return static_cast<const CLASS *>(this)->getPreInits();
 #include "clang/AST/StmtNodes.inc"
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 2035fa7635f2a..7a9b7fb431099 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -999,30 +999,30 @@ void StmtProfiler::VisitOMPSimdDirective(const OMPSimdDirective *S) {
   VisitOMPLoopDirective(S);
 }
 
-void StmtProfiler::VisitOMPLoopTransformationDirective(
-    const OMPLoopTransformationDirective *S) {
+void StmtProfiler::VisitOMPCanonicalLoopNestTransformationDirective(
+    const OMPCanonicalLoopNestTransformationDirective *S) {
   VisitOMPLoopBasedDirective(S);
 }
 
 void StmtProfiler::VisitOMPTileDirective(const OMPTileDirective *S) {
-  VisitOMPLoopTransformationDirective(S);
+  VisitOMPCanonicalLoopNestTransformationDirective(S);
 }
 
 void StmtProfiler::VisitOMPStripeDirective(const OMPStripeDirective *S) {
-  VisitOMPLoopTransformationDirective(S);
+  VisitOMPCanonicalLoopNestTransformationDirective(S);
 }
 
 void StmtProfiler::VisitOMPUnrollDirective(const OMPUnrollDirective *S) {
-  VisitOMPLoopTransformationDirective(S);
+  VisitOMPCanonicalLoopNestTransformationDirective(S);
 }
 
 void StmtProfiler::VisitOMPReverseDirective(const OMPReverseDirective *S) {
-  VisitOMPLoopTransformationDirective(S);
+  VisitOMPCanonicalLoopNestTransformationDirective(S);
 }
 
 void StmtProfiler::VisitOMPInterchangeDirective(
     const OMPInterchangeDirective *S) {
-  VisitOMPLoopTransformationDirective(S);
+  VisitOMPCanonicalLoopNestTransformationDirective(S);
 }
 
 void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) {
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index 220b31b0f19bc..3f8f64df8702e 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -717,11 +717,17 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) {
          Kind == OMPD_teams_loop || Kind == OMPD_target_teams_loop;
 }
 
-bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) {
+bool clang::isOpenMPCanonicalLoopNestTransformationDirective(
+    OpenMPDirectiveKind DKind) {
   return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse ||
          DKind == OMPD_interchange || DKind == OMPD_stripe;
 }
 
+bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) {
+  // FIXME: There will be more cases when we implement 'fuse'.
+  return isOpenMPCanonicalLoopNestTransformationDirective(DKind);
+}
+
 bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) {
   return DKind == OMPD_parallel_for || DKind == OMPD_parallel_for_simd ||
          DKind == OMPD_parallel_master ||
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index f6a0ca574a191..6f795b45bc381 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1927,7 +1927,8 @@ static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
     return;
   }
   if (SimplifiedS == NextLoop) {
-    if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS))
+    if (auto *Dir =
+            dyn_cast<OMPCanonicalLoopNestTransformationDirective>(SimplifiedS))
       SimplifiedS = Dir->getTransformedStmt();
     if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
       SimplifiedS = CanonLoop->getLoopStmt();
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 7d800c446b595..a02850c66b4fe 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -4145,7 +4145,8 @@ class DSAAttrChecker final : public StmtVisitor<DSAAttrChecker, void> {
     VisitSubCaptures(S);
   }
 
-  void VisitOMPLoopTransformationDirective(OMPLoopTransformationDirective *S) {
+  void VisitOMPCanonicalLoopNestTransformationDirective(
+      OMPCanonicalLoopNestTransformationDirective *S) {
     // Loop transformation directives do not introduce data sharing
     VisitStmt(S);
   }
@@ -9748,7 +9749,8 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
             }
             return false;
           },
-          [&SemaRef, &Captures](OMPLoopTransformationDirective *Transform) {
+          [&SemaRef,
+           &Captures](OMPCanonicalLoopNestTransformationDirective *Transform) {
             Stmt *DependentPreInits = Transform->getPreInits();
             if (!DependentPreInits)
               return;
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 3f37dfbc3dea9..13618b4a03d1e 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2442,30 +2442,30 @@ void ASTStmtReader::VisitOMPSimdDirective(OMPSimdDirective *D) {
   VisitOMPLoopDirective(D);
 }
 
-void ASTStmtReader::VisitOMPLoopTransformationDirective(
-    OMPLoopTransformationDirective *D) {
+void ASTStmtReader::VisitOMPCanonicalLoopNestTransformationDirective(
+    OMPCanonicalLoopNestTransformationDirective *D) {
   VisitOMPLoopBasedDirective(D);
   D->setNumGeneratedLoops(Record.readUInt32());
 }
 
 void ASTStmtReader::VisitOMPTileDirective(OMPTileDirective *D) {
-  VisitOMPLoopTransformationDirective(D);
+  VisitOMPCanonicalLoopNestTransformationDirective(D);
 }
 
 void ASTStmtReader::VisitOMPStripeDirective(OMPStripeDirective *D) {
-  VisitOMPLoopTransformationDirective(D);
+  VisitOMPCanonicalLoopNestTransformationDirective(D);
 }
 
 void ASTStmtReader::VisitOMPUnrollDirective(OMPUnrollDirective *D) {
-  VisitOMPLoopTransformationDirective(D);
+  VisitOMPCanonicalLoopNestTransformationDirective(D);
 }
 
 void ASTStmtReader::VisitOMPReverseDirective(OMPReverseDirective *D) {
-  VisitOMPLoopTransformationDirective(D);
+  VisitOMPCanonicalLoopNestTransformationDirective(D);
 }
 
 void ASTStmtReader::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) {
-  VisitOMPLoopTransformationDirective(D);
+  VisitOMPCanonicalLoopNestTransformationDirective(D);
 }
 
 void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) {
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index be9bad9e96cc1..36b022cc9d371 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -2445,34 +2445,34 @@ void ASTStmtWriter::VisitOMPSimdDirective(OMPSimdDirective *D) {
   Code = serialization::STMT_OMP_SIMD_DIRECTIVE;
 }
 
-void ASTStmtWriter::VisitOMPLoopTransformationDirective(
-    OMPLoopTransformationDirective *D) {
+void ASTStmtWriter::VisitOMPCanonicalLoopNestTransformationDirective(
+    OMPCanonicalLoopNestTransformationDirective *D) {
   VisitOMPLoopBasedDirective(D);
   Record.writeUInt32(D->getNumGeneratedLoops());
 }
 
 void ASTStmtWriter::VisitOMPTileDirective(OMPTileDirective *D) {
-  VisitOMPLoopTransformationDirective(D);
+  VisitOMPCanonicalLoopNestTransformationDirective(D);
   Code = serialization::STMT_OMP_TILE_DIRECTIVE;
 }
 
 void ASTStmtWriter::VisitOMPStripeDirective(OMPStripeDirective *D) {
-  VisitOMPLoopTransformationDirective(D);
+  VisitOMPCanonicalLoopNestTransformationDirective(D);
   Code = serialization::STMP_OMP_STRIPE_DIRECTIVE;
 }
 
 void ASTStmtWriter::VisitOMPUnrollDirective(OMPUnrollDirective *D) {
-  VisitOMPLoopTransformationDirective(D);
+  VisitOMPCanonicalLoopNestTransformationDirective(D);
   Code = serialization::STMT_OMP_UNROLL_DIRECTIVE;
 }
 
 void ASTStmtWriter::VisitOMPReverseDirective(OMPReverseDirective *D) {
-  VisitOMPLoopTransformationDirective(D);
+  VisitOMPCanonicalLoopNestTransformationDirective(D);
   Code = serialization::STMT_OMP_REVERSE_DIRECTIVE;
 }
 
 void ASTStmtWriter::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) {
-  VisitOMPLoopTransformationDirective(D);
+  VisitOMPCanonicalLoopNestTransformationDirective(D);
   Code = serialization::STMT_OMP_INTERCHANGE_DIRECTIVE;
 }
 
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 858423a06576a..b12b1f07c2f70 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2154,8 +2154,8 @@ class EnqueueVisitor : public ConstStmtVisitor<EnqueueVisitor, void>,
   void VisitOMPLoopDirective(const OMPLoopDirective *D);
   void VisitOMPParallelDirective(const OMPParallelDirective *D);
   void VisitOMPSimdDirective(const OMPSimdDirective *D);
-  void
-  VisitOMPLoopTransformationDirective(const OMPLoopTransformationDirective *D);
+  void VisitOMPCanonicalLoopNestTransformationDirective(
+      const OMPCanonicalLoopNestTransformationDirective *D);
   void VisitOMPTileDirective(const OMPTileDirective *D);
   void VisitOMPStripeDirective(const OMPStripeDirective *D);
   void VisitOMPUnrollDirective(const OMPUnrollDirective *D);
@@ -3301,30 +3301,30 @@ void EnqueueVisitor::VisitOMPSimdDirective(const OMPSimdDirective *D) {
   VisitOMPLoopDirective(D);
 }
 
-void EnqueueVisitor::VisitOMPLoopTransformationDirective(
-    const OMPLoopTransformationDirective *D) {
+void EnqueueVisitor::VisitOMPCanonicalLoopNestTransformationDirective(
+    const OMPCanonicalLoopNestTransformationDirective *D) {
   VisitOMPLoopBasedDirective(D);
 }
 
 void EnqueueVisitor::VisitOMPTileDirective(const OMPTileDirective *D) {
-  VisitOMPLoopTransformationDirective(D);
+  VisitOMPCanonicalLoopNestTransformationDirective(D);
 }
 
 void EnqueueVisitor::VisitOMPStripeDirective(const OMPStripeDirective *D) {
-  VisitOMPLoopTransformationDirective(D);
+  VisitOMPCanonicalLoopNestTransformationDirective(D);
 }
 
 void EnqueueVisitor::VisitOMPUnrollDirective(const OMPUnrollDirective *D) {
-  VisitOMPLoopTransformationDirective(D);
+  VisitOMPCanonicalLoopNestTransformationDirective(D);
 }
 
 void EnqueueVisitor::VisitOMPReverseDirective(const OMPReverseDirective *D) {
-  VisitOMPLoopTransformationDirective(D);
+  VisitOMPCanonicalLoopNestTransformationDirective(D);
 }
 
 void EnqueueVisitor::VisitOMPInterchangeDirective(
     const OMPInterchangeDirective *D) {
-  VisitOMPLoopTransformationDirective(D);
+  VisitOMPCanonicalLoopNestTransformationDirective(D);
 }
 
 void EnqueueVisitor::VisitOMPForDirective(const OMPForDirective *D) {

>From 280f7435a15900085ccf2731b1d4f26297455f71 Mon Sep 17 00:00:00 2001
From: Roger Ferrer Ibanez <roger.ferrer at bsc.es>
Date: Wed, 27 Aug 2025 08:18:02 +0000
Subject: [PATCH 2/3] [Clang][OpenMP] Add an additional class to hold data that
 will be shared between all loop transformations

This class is not a statement by itself and its goal is to avoid having
to replicate information between classes.

Also simplify the way we handle the "generated loops" information as we
currently only need to know if it is zero or non-zero.
---
 clang/include/clang/AST/StmtOpenMP.h | 48 +++++++++++++++-------------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index a436676113921..602a516c0d43f 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -956,30 +956,42 @@ class OMPLoopBasedDirective : public OMPExecutableDirective {
   }
 };
 
+/// Common class of data shared between
+/// OMPCanonicalLoopNestTransformationDirective and transformations over
+/// canonical loop sequences.
+class OMPLoopTransformationDirective {
+  /// Number of (top-level) generated loops.
+  /// This value is 1 for most transformations as they only map one loop nest
+  /// into another.
+  /// Some loop transformations (like a non-partial 'unroll') may not generate
+  /// a loop nest, so this would be 0.
+  /// Some loop transformations (like 'fuse' with looprange and 'split') may
+  /// generate more than one loop nest, so the value would be >= 1.
+  unsigned NumGeneratedLoops = 1;
+
+protected:
+  void setNumGeneratedLoops(unsigned N) { NumGeneratedLoops = N; }
+
+public:
+  unsigned getNumGeneratedLoops() const { return NumGeneratedLoops; }
+};
+
 /// The base class for all transformation directives of canonical loop nests.
 class OMPCanonicalLoopNestTransformationDirective
-    : public OMPLoopBasedDirective {
+    : public OMPLoopBasedDirective,
+      public OMPLoopTransformationDirective {
   friend class ASTStmtReader;
 
-  /// Number of loops generated by this loop transformation.
-  unsigned NumGeneratedLoops = 0;
-
 protected:
   explicit OMPCanonicalLoopNestTransformationDirective(
       StmtClass SC, OpenMPDirectiveKind Kind, SourceLocation StartLoc,
       SourceLocation EndLoc, unsigned NumAssociatedLoops)
       : OMPLoopBasedDirective(SC, Kind, StartLoc, EndLoc, NumAssociatedLoops) {}
 
-  /// Set the number of loops generated by this loop transformation.
-  void setNumGeneratedLoops(unsigned Num) { NumGeneratedLoops = Num; }
-
 public:
   /// Return the number of associated (consumed) loops.
   unsigned getNumAssociatedLoops() const { return getLoopsNumber(); }
 
-  /// Return the number of loops generated by this loop transformation.
-  unsigned getNumGeneratedLoops() const { return NumGeneratedLoops; }
-
   /// Get the de-sugared statements after the loop transformation.
   ///
   /// Might be nullptr if either the directive generates no loops and is handled
@@ -5560,9 +5572,7 @@ class OMPTileDirective final
                             unsigned NumLoops)
       : OMPCanonicalLoopNestTransformationDirective(
             OMPTileDirectiveClass, llvm::omp::OMPD_tile, StartLoc, EndLoc,
-            NumLoops) {
-    setNumGeneratedLoops(2 * NumLoops);
-  }
+            NumLoops) {}
 
   void setPreInits(Stmt *PreInits) {
     Data->getChildren()[PreInitsOffset] = PreInits;
@@ -5638,9 +5648,7 @@ class OMPStripeDirective final
                               unsigned NumLoops)
       : OMPCanonicalLoopNestTransformationDirective(
             OMPStripeDirectiveClass, llvm::omp::OMPD_stripe, StartLoc, EndLoc,
-            NumLoops) {
-    setNumGeneratedLoops(2 * NumLoops);
-  }
+            NumLoops) {}
 
   void setPreInits(Stmt *PreInits) {
     Data->getChildren()[PreInitsOffset] = PreInits;
@@ -5794,9 +5802,7 @@ class OMPReverseDirective final
                                unsigned NumLoops)
       : OMPCanonicalLoopNestTransformationDirective(
             OMPReverseDirectiveClass, llvm::omp::OMPD_reverse, StartLoc, EndLoc,
-            NumLoops) {
-    setNumGeneratedLoops(NumLoops);
-  }
+            NumLoops) {}
 
   void setPreInits(Stmt *PreInits) {
     Data->getChildren()[PreInitsOffset] = PreInits;
@@ -5867,9 +5873,7 @@ class OMPInterchangeDirective final
                                    SourceLocation EndLoc, unsigned NumLoops)
       : OMPCanonicalLoopNestTransformationDirective(
             OMPInterchangeDirectiveClass, llvm::omp::OMPD_interchange, StartLoc,
-            EndLoc, NumLoops) {
-    setNumGeneratedLoops(NumLoops);
-  }
+            EndLoc, NumLoops) {}
 
   void setPreInits(Stmt *PreInits) {
     Data->getChildren()[PreInitsOffset] = PreInits;

>From 3232f0b50a24fde1cba0317ceb56a43b9340d198 Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Wed, 27 Aug 2025 13:22:51 +0000
Subject: [PATCH 3/3] [Clang][OpenMP] Implement #pragma omp fuse

---
 clang/docs/OpenMPSupport.rst                  |    2 +
 clang/docs/ReleaseNotes.rst                   |    1 +
 clang/include/clang-c/Index.h                 |    4 +
 clang/include/clang/AST/OpenMPClause.h        |   74 +
 clang/include/clang/AST/RecursiveASTVisitor.h |   11 +
 clang/include/clang/AST/StmtOpenMP.h          |  211 +-
 .../clang/Basic/DiagnosticSemaKinds.td        |   14 +
 clang/include/clang/Basic/OpenMPKinds.h       |    7 +
 clang/include/clang/Basic/StmtNodes.td        |    4 +
 clang/include/clang/Parse/Parser.h            |    3 +
 clang/include/clang/Sema/SemaOpenMP.h         |   91 +-
 .../include/clang/Serialization/ASTBitCodes.h |    1 +
 clang/lib/AST/OpenMPClause.cpp                |   35 +
 clang/lib/AST/StmtOpenMP.cpp                  |   79 +-
 clang/lib/AST/StmtPrinter.cpp                 |    5 +
 clang/lib/AST/StmtProfile.cpp                 |   16 +
 clang/lib/Basic/OpenMPKinds.cpp               |   11 +-
 clang/lib/CodeGen/CGStmt.cpp                  |    3 +
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |   42 +-
 clang/lib/CodeGen/CodeGenFunction.h           |    1 +
 clang/lib/Parse/ParseOpenMP.cpp               |   36 +
 clang/lib/Sema/SemaExceptionSpec.cpp          |    1 +
 clang/lib/Sema/SemaOpenMP.cpp                 |  850 +++++-
 clang/lib/Sema/TreeTransform.h                |   44 +
 clang/lib/Serialization/ASTReader.cpp         |   11 +
 clang/lib/Serialization/ASTReaderStmt.cpp     |   20 +
 clang/lib/Serialization/ASTWriter.cpp         |    8 +
 clang/lib/Serialization/ASTWriterStmt.cpp     |   13 +
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp  |    1 +
 clang/test/OpenMP/fuse_ast_print.cpp          |  400 +++
 clang/test/OpenMP/fuse_codegen.cpp            | 2328 +++++++++++++++++
 clang/test/OpenMP/fuse_messages.cpp           |  210 ++
 clang/tools/libclang/CIndex.cpp               |   19 +
 clang/tools/libclang/CXCursor.cpp             |    3 +
 flang/include/flang/Lower/OpenMP/Clauses.h    |    1 +
 flang/include/flang/Parser/dump-parse-tree.h  |    1 +
 flang/include/flang/Parser/parse-tree.h       |    9 +
 flang/lib/Lower/OpenMP/Clauses.cpp            |    5 +
 flang/lib/Parser/openmp-parsers.cpp           |    5 +
 flang/lib/Parser/unparse.cpp                  |    7 +
 flang/lib/Semantics/check-omp-structure.cpp   |    6 +
 llvm/include/llvm/Frontend/OpenMP/ClauseT.h   |   13 +-
 llvm/include/llvm/Frontend/OpenMP/OMP.td      |    9 +
 .../runtime/test/transform/fuse/foreach.cpp   |  191 ++
 openmp/runtime/test/transform/fuse/intfor.c   |   50 +
 .../runtime/test/transform/fuse/iterfor.cpp   |  194 ++
 .../fuse/parallel-wsloop-collapse-foreach.cpp |  207 ++
 .../fuse/parallel-wsloop-collapse-intfor.c    |   45 +
 48 files changed, 5248 insertions(+), 54 deletions(-)
 create mode 100644 clang/test/OpenMP/fuse_ast_print.cpp
 create mode 100644 clang/test/OpenMP/fuse_codegen.cpp
 create mode 100644 clang/test/OpenMP/fuse_messages.cpp
 create mode 100644 openmp/runtime/test/transform/fuse/foreach.cpp
 create mode 100644 openmp/runtime/test/transform/fuse/intfor.c
 create mode 100644 openmp/runtime/test/transform/fuse/iterfor.cpp
 create mode 100644 openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp
 create mode 100644 openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c

diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst
index 2b510747bea90..35c0a2047cfae 100644
--- a/clang/docs/OpenMPSupport.rst
+++ b/clang/docs/OpenMPSupport.rst
@@ -376,6 +376,8 @@ implementation.
 +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
 | loop stripe transformation                                  | :good:`done`              | https://github.com/llvm/llvm-project/pull/119891                                                     |
 +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| loop fuse transformation                                    | :good:`done`              | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
 | workdistribute construct                                    |                           | :none:`in progress`       | @skc7, @mjklemm                                                          |
 +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
 | task_iteration                                              | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 67178de6efcff..a062435ce461b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -445,6 +445,7 @@ OpenMP Support
   modifier in the ``adjust_args`` clause.
 - Allow array length to be omitted in array section subscript expression.
 - Fixed non-contiguous strided update in the ``omp target update`` directive with the ``from`` clause.
+- Added support for 'omp fuse' directive.
 
 Improvements
 ^^^^^^^^^^^^
diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h
index be038d9165fc6..f13d9c9307b40 100644
--- a/clang/include/clang-c/Index.h
+++ b/clang/include/clang-c/Index.h
@@ -2162,6 +2162,10 @@ enum CXCursorKind {
    */
   CXCursor_OMPStripeDirective = 310,
 
+  /** OpenMP fuse directive
+   */
+  CXCursor_OMPFuseDirective = 311,
+
   /** OpenACC Compute Construct.
    */
   CXCursor_OpenACCComputeConstruct = 320,
diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index 1118d3e062e68..9607c94da1cea 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -1149,6 +1149,80 @@ class OMPFullClause final : public OMPNoChildClause<llvm::omp::OMPC_full> {
   static OMPFullClause *CreateEmpty(const ASTContext &C);
 };
 
+/// This class represents the 'looprange' clause in the
+/// '#pragma omp fuse' directive
+///
+/// \code {c}
+/// #pragma omp fuse looprange(1,2)
+/// {
+///   for(int i = 0; i < 64; ++i)
+///   for(int j = 0; j < 256; j+=2)
+///   for(int k = 127; k >= 0; --k)
+/// \endcode
+class OMPLoopRangeClause final : public OMPClause {
+  friend class OMPClauseReader;
+  /// Location of '('
+  SourceLocation LParenLoc;
+
+  /// Location of first and count expressions
+  SourceLocation FirstLoc, CountLoc;
+
+  /// Number of looprange arguments (always 2: first, count)
+  static constexpr unsigned NumArgs = 2;
+  Stmt *Args[NumArgs] = {nullptr, nullptr};
+
+  /// Set looprange 'first' expression
+  void setFirst(Expr *E) { Args[0] = E; }
+
+  /// Set looprange 'count' expression
+  void setCount(Expr *E) { Args[1] = E; }
+
+  /// Build an empty clause for deserialization.
+  explicit OMPLoopRangeClause()
+      : OMPClause(llvm::omp::OMPC_looprange, {}, {}) {}
+
+public:
+  /// Build a 'looprange' clause AST node.
+  static OMPLoopRangeClause *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
+         SourceLocation FirstLoc, SourceLocation CountLoc,
+         SourceLocation EndLoc, Expr *First, Expr *Count);
+
+  /// Build an empty 'looprange' clause node.
+  static OMPLoopRangeClause *CreateEmpty(const ASTContext &C);
+
+  // Location getters/setters
+  SourceLocation getLParenLoc() const { return LParenLoc; }
+  SourceLocation getFirstLoc() const { return FirstLoc; }
+  SourceLocation getCountLoc() const { return CountLoc; }
+
+  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+  void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; }
+  void setCountLoc(SourceLocation Loc) { CountLoc = Loc; }
+
+  /// Get looprange 'first' expression
+  Expr *getFirst() const { return cast_or_null<Expr>(Args[0]); }
+
+  /// Get looprange 'count' expression
+  Expr *getCount() const { return cast_or_null<Expr>(Args[1]); }
+
+  child_range children() { return child_range(Args, Args + NumArgs); }
+  const_child_range children() const {
+    return const_child_range(Args, Args + NumArgs);
+  }
+
+  child_range used_children() {
+    return child_range(child_iterator(), child_iterator());
+  }
+  const_child_range used_children() const {
+    return const_child_range(const_child_iterator(), const_child_iterator());
+  }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == llvm::omp::OMPC_looprange;
+  }
+};
+
 /// Representation of the 'partial' clause of the '#pragma omp unroll'
 /// directive.
 ///
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 02581c8e73299..89de8f767f579 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -3186,6 +3186,9 @@ DEF_TRAVERSE_STMT(OMPUnrollDirective,
 DEF_TRAVERSE_STMT(OMPReverseDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
+DEF_TRAVERSE_STMT(OMPFuseDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
 DEF_TRAVERSE_STMT(OMPInterchangeDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
@@ -3503,6 +3506,14 @@ bool RecursiveASTVisitor<Derived>::VisitOMPFullClause(OMPFullClause *C) {
   return true;
 }
 
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPLoopRangeClause(
+    OMPLoopRangeClause *C) {
+  TRY_TO(TraverseStmt(C->getFirst()));
+  TRY_TO(TraverseStmt(C->getCount()));
+  return true;
+}
+
 template <typename Derived>
 bool RecursiveASTVisitor<Derived>::VisitOMPPartialClause(OMPPartialClause *C) {
   TRY_TO(TraverseStmt(C->getFactor()));
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index 602a516c0d43f..65b47b6d98ef0 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -21,6 +21,7 @@
 #include "clang/AST/StmtCXX.h"
 #include "clang/Basic/OpenMPKinds.h"
 #include "clang/Basic/SourceLocation.h"
+#include "llvm/Support/Casting.h"
 
 namespace clang {
 
@@ -677,6 +678,10 @@ class OMPParallelDirective : public OMPExecutableDirective {
   }
 };
 
+// Forward declaration of a generic loop transformation. Used in the declaration
+// of OMPLoopBasedDirective.
+class OMPLoopTransformationDirective;
+
 /// The base class for all loop-based directives, including loop transformation
 /// directives.
 class OMPLoopBasedDirective : public OMPExecutableDirective {
@@ -889,24 +894,23 @@ class OMPLoopBasedDirective : public OMPExecutableDirective {
 
   /// Calls the specified callback function for all the loops in \p CurStmt,
   /// from the outermost to the innermost.
-  static bool doForAllLoops(
-      Stmt *CurStmt, bool TryImperfectlyNestedLoops, unsigned NumLoops,
-      llvm::function_ref<bool(unsigned, Stmt *)> Callback,
-      llvm::function_ref<void(OMPCanonicalLoopNestTransformationDirective *)>
-          OnTransformationCallback);
+  static bool
+  doForAllLoops(Stmt *CurStmt, bool TryImperfectlyNestedLoops,
+                unsigned NumLoops,
+                llvm::function_ref<bool(unsigned, Stmt *)> Callback,
+                llvm::function_ref<void(OMPLoopTransformationDirective *)>
+                    OnTransformationCallback);
   static bool
   doForAllLoops(const Stmt *CurStmt, bool TryImperfectlyNestedLoops,
                 unsigned NumLoops,
                 llvm::function_ref<bool(unsigned, const Stmt *)> Callback,
-                llvm::function_ref<
-                    void(const OMPCanonicalLoopNestTransformationDirective *)>
+                llvm::function_ref<void(const OMPLoopTransformationDirective *)>
                     OnTransformationCallback) {
     auto &&NewCallback = [Callback](unsigned Cnt, Stmt *CurStmt) {
       return Callback(Cnt, CurStmt);
     };
     auto &&NewTransformCb =
-        [OnTransformationCallback](
-            OMPCanonicalLoopNestTransformationDirective *A) {
+        [OnTransformationCallback](OMPLoopTransformationDirective *A) {
           OnTransformationCallback(A);
         };
     return doForAllLoops(const_cast<Stmt *>(CurStmt), TryImperfectlyNestedLoops,
@@ -919,7 +923,7 @@ class OMPLoopBasedDirective : public OMPExecutableDirective {
   doForAllLoops(Stmt *CurStmt, bool TryImperfectlyNestedLoops,
                 unsigned NumLoops,
                 llvm::function_ref<bool(unsigned, Stmt *)> Callback) {
-    auto &&TransformCb = [](OMPCanonicalLoopNestTransformationDirective *) {};
+    auto &&TransformCb = [](OMPLoopTransformationDirective *) {};
     return doForAllLoops(CurStmt, TryImperfectlyNestedLoops, NumLoops, Callback,
                          TransformCb);
   }
@@ -957,9 +961,11 @@ class OMPLoopBasedDirective : public OMPExecutableDirective {
 };
 
 /// Common class of data shared between
-/// OMPCanonicalLoopNestTransformationDirective and transformations over
-/// canonical loop sequences.
+/// OMPCanonicalLoopNestTransformationDirective and
+/// OMPCanonicalLoopSequenceTransformationDirective
 class OMPLoopTransformationDirective {
+  friend class ASTStmtReader;
+
   /// Number of (top-level) generated loops.
   /// This value is 1 for most transformations as they only map one loop nest
   /// into another.
@@ -969,11 +975,35 @@ class OMPLoopTransformationDirective {
   /// generate more than one loop nest, so the value would be >= 1.
   unsigned NumGeneratedLoops = 1;
 
+  /// We need this because we cannot easily make OMPLoopTransformationDirective
+  /// a proper Stmt.
+  Stmt *S;
+
 protected:
   void setNumGeneratedLoops(unsigned N) { NumGeneratedLoops = N; }
 
+  explicit OMPLoopTransformationDirective(Stmt *S) : S(S) {}
+
 public:
   unsigned getNumGeneratedLoops() const { return NumGeneratedLoops; }
+
+  /// Returns the specific directive related to this loop transformation.
+  Stmt *getDirective() const { return S; }
+
+  /// Get the de-sugared statements after the loop transformation.
+  ///
+  /// Might be nullptr if either the directive generates no loops and is handled
+  /// directly in CodeGen, or resolving a template-dependence context is
+  /// required.
+  Stmt *getTransformedStmt() const;
+
+  /// Return preinits statement.
+  Stmt *getPreInits() const;
+
+  static bool classof(const Stmt *T) {
+    return isa<OMPCanonicalLoopNestTransformationDirective>(T) ||
+           isa<OMPCanonicalLoopSequenceTransformationDirective>(T);
+  }
 };
 
 /// The base class for all transformation directives of canonical loop nests.
@@ -986,7 +1016,8 @@ class OMPCanonicalLoopNestTransformationDirective
   explicit OMPCanonicalLoopNestTransformationDirective(
       StmtClass SC, OpenMPDirectiveKind Kind, SourceLocation StartLoc,
       SourceLocation EndLoc, unsigned NumAssociatedLoops)
-      : OMPLoopBasedDirective(SC, Kind, StartLoc, EndLoc, NumAssociatedLoops) {}
+      : OMPLoopBasedDirective(SC, Kind, StartLoc, EndLoc, NumAssociatedLoops),
+        OMPLoopTransformationDirective(this) {}
 
 public:
   /// Return the number of associated (consumed) loops.
@@ -5923,6 +5954,124 @@ class OMPInterchangeDirective final
   }
 };
 
+/// The base class for all transformation directives of canonical loop
+/// sequences (currently only 'fuse')
+class OMPCanonicalLoopSequenceTransformationDirective
+    : public OMPExecutableDirective,
+      public OMPLoopTransformationDirective {
+  friend class ASTStmtReader;
+
+  /// Number of loops contained in the canonical loop sequence.
+  unsigned NumLoopsInSequence = 1;
+
+protected:
+  explicit OMPCanonicalLoopSequenceTransformationDirective(
+      StmtClass SC, OpenMPDirectiveKind Kind, SourceLocation StartLoc,
+      SourceLocation EndLoc, unsigned NumLoopsInSequence)
+      : OMPExecutableDirective(SC, Kind, StartLoc, EndLoc),
+        OMPLoopTransformationDirective(this),
+        NumLoopsInSequence(NumLoopsInSequence) {}
+
+public:
+  /// Returns the number of canonical loop nests contained in this sequence.
+  unsigned getNumLoopsInSequence() const { return NumLoopsInSequence; }
+
+  /// Get the de-sugared statements after the loop transformation.
+  ///
+  /// Might be nullptr if either the directive generates no loops and is handled
+  /// directly in CodeGen, or resolving a template-dependence context is
+  /// required.
+  Stmt *getTransformedStmt() const;
+
+  /// Return preinits statement.
+  Stmt *getPreInits() const;
+
+  static bool classof(const Stmt *T) {
+    Stmt::StmtClass C = T->getStmtClass();
+    return C == OMPFuseDirectiveClass;
+  }
+};
+
+/// Represents the '#pragma omp fuse' loop transformation directive
+///
+/// \code{c}
+/// #pragma omp fuse
+/// {
+///   for(int i = 0; i < m1; ++i) {...}
+///   for(int j = 0; j < m2; ++j) {...}
+///   ...
+/// }
+/// \endcode
+class OMPFuseDirective final
+    : public OMPCanonicalLoopSequenceTransformationDirective {
+  friend class ASTStmtReader;
+  friend class OMPExecutableDirective;
+
+  // Offsets of child members.
+  enum {
+    PreInitsOffset = 0,
+    TransformedStmtOffset,
+  };
+
+  unsigned NumGeneratedLoopNests = 1;
+
+  explicit OMPFuseDirective(SourceLocation StartLoc, SourceLocation EndLoc,
+                            unsigned NumLoopsInSequence)
+      : OMPCanonicalLoopSequenceTransformationDirective(
+            OMPFuseDirectiveClass, llvm::omp::OMPD_fuse, StartLoc, EndLoc,
+            NumLoopsInSequence) {}
+
+  void setPreInits(Stmt *PreInits) {
+    Data->getChildren()[PreInitsOffset] = PreInits;
+  }
+
+  void setTransformedStmt(Stmt *S) {
+    Data->getChildren()[TransformedStmtOffset] = S;
+  }
+
+public:
+  /// Create a new AST node representation for #pragma omp fuse'
+  ///
+  /// \param C Context of the AST
+  /// \param StartLoc Location of the introducer (e.g the 'omp' token)
+  /// \param EndLoc Location of the directive's end (e.g the tok::eod)
+  /// \param Clauses The directive's clauses
+  /// \param NumLoops Total number of loops.
+  /// \param NumLoopsFused Number of affected top level canonical loops
+  ///                      (may be different to NumLoops due to looprange)
+  /// \param AssociatedStmt The outermost associated loop
+  /// \param TransformedStmt The loop nest after fusion, or nullptr in
+  ///                        dependent
+  /// \param PreInits Helper preinits statements for the loop nest
+  static OMPFuseDirective *Create(const ASTContext &C, SourceLocation StartLoc,
+                                  SourceLocation EndLoc,
+                                  ArrayRef<OMPClause *> Clauses,
+                                  unsigned NumLoops, unsigned NumLoopsFused,
+                                  Stmt *AssociatedStmt, Stmt *TransformedStmt,
+                                  Stmt *PreInits);
+
+  /// Build an empty '#pragma omp fuse' AST node for deserialization
+  ///
+  /// \param C Context of the AST
+  /// \param NumClauses Number of clauses to allocate
+  /// \param NumLoopNests Number of top level loops to allocate
+  static OMPFuseDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses,
+                                       unsigned NumLoops);
+
+  /// Gets the associated loops after the transformation. This is the de-sugared
+  /// replacement or nulltpr in dependent contexts.
+  Stmt *getTransformedStmt() const {
+    return Data->getChildren()[TransformedStmtOffset];
+  }
+
+  /// Return preinits statement.
+  Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == OMPFuseDirectiveClass;
+  }
+};
+
 /// This represents '#pragma omp scan' directive.
 ///
 /// \code
@@ -6591,4 +6740,40 @@ class OMPAssumeDirective final : public OMPExecutableDirective {
 
 } // end namespace clang
 
+namespace llvm {
+// Allow a Stmt* be casted correctly to an OMPLoopTransformationDirective*.
+// The default routines would just use a C-style cast which won't work well
+// for the multiple inheritance here. We have to use a static cast from the
+// corresponding subclass.
+template <>
+struct CastInfo<clang::OMPLoopTransformationDirective, clang::Stmt *>
+    : public NullableValueCastFailed<clang::OMPLoopTransformationDirective *>,
+      public DefaultDoCastIfPossible<
+          clang::OMPLoopTransformationDirective *, clang::Stmt *,
+          CastInfo<clang::OMPLoopTransformationDirective, clang::Stmt *>> {
+  static bool isPossible(const clang::Stmt *T) {
+    return clang::OMPLoopTransformationDirective::classof(T);
+  }
+
+  static clang::OMPLoopTransformationDirective *doCast(clang::Stmt *T) {
+    if (auto *D =
+            dyn_cast<clang::OMPCanonicalLoopNestTransformationDirective>(T)) {
+      return static_cast<clang::OMPLoopTransformationDirective *>(D);
+    }
+    if (auto *D =
+            dyn_cast<clang::OMPCanonicalLoopSequenceTransformationDirective>(
+                T)) {
+      return static_cast<clang::OMPLoopTransformationDirective *>(D);
+    }
+    llvm_unreachable("unexpected type");
+  }
+};
+template <>
+struct CastInfo<clang::OMPLoopTransformationDirective, const clang::Stmt *>
+    : public ConstStrippingForwardingCast<
+          clang::OMPLoopTransformationDirective, const clang::Stmt *,
+          CastInfo<clang::OMPLoopTransformationDirective, clang::Stmt *>> {};
+
+} // namespace llvm
+
 #endif
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 4bee813edf645..3eaa1be260513 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11724,6 +11724,20 @@ def note_omp_implicit_dsa : Note<
   "implicitly determined as %0">;
 def err_omp_loop_var_dsa : Error<
   "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">;
+def err_omp_not_canonical_loop
+    : Error<"loop after '#pragma omp %0' is not in canonical form">;
+def err_omp_not_a_loop_sequence
+    : Error<"statement after '#pragma omp %0' must be a loop sequence "
+            "containing canonical loops or loop-generating constructs">;
+def err_omp_empty_loop_sequence
+    : Error<"loop sequence after '#pragma omp %0' must contain at least 1 "
+            "canonical loop or loop-generating construct">;
+def err_omp_invalid_looprange
+    : Error<"looprange clause selects loops from %1 to %2 but this exceeds the "
+            "number of loops (%3) in the loop sequence">;
+def warn_omp_redundant_fusion : Warning<"looprange clause selects a single "
+                                        "loop, resulting in redundant fusion">,
+                                InGroup<OpenMPClauses>;
 def err_omp_not_for : Error<
   "%select{statement after '#pragma omp %1' must be a for loop|"
   "expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">;
diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h
index d3285cd9c6a14..59b9c18052636 100644
--- a/clang/include/clang/Basic/OpenMPKinds.h
+++ b/clang/include/clang/Basic/OpenMPKinds.h
@@ -372,6 +372,13 @@ bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind);
 bool isOpenMPCanonicalLoopNestTransformationDirective(
     OpenMPDirectiveKind DKind);
 
+/// Checks if the specified directive is a loop transformation directive that
+/// applies to a canonical loop sequence.
+/// \param DKind Specified directive.
+/// \return True iff the directive is a loop transformation.
+bool isOpenMPCanonicalLoopSequenceTransformationDirective(
+    OpenMPDirectiveKind DKind);
+
 /// Checks if the specified directive is a loop transformation directive.
 /// \param DKind Specified directive.
 /// \return True iff the directive is a loop transformation.
diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index 781577549573d..9135acb7850d6 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -235,6 +235,10 @@ def OMPUnrollDirective : StmtNode<OMPCanonicalLoopNestTransformationDirective>;
 def OMPReverseDirective : StmtNode<OMPCanonicalLoopNestTransformationDirective>;
 def OMPInterchangeDirective
     : StmtNode<OMPCanonicalLoopNestTransformationDirective>;
+def OMPCanonicalLoopSequenceTransformationDirective
+    : StmtNode<OMPExecutableDirective, 1>;
+def OMPFuseDirective
+    : StmtNode<OMPCanonicalLoopSequenceTransformationDirective>;
 def OMPForDirective : StmtNode<OMPLoopDirective>;
 def OMPForSimdDirective : StmtNode<OMPLoopDirective>;
 def OMPSectionsDirective : StmtNode<OMPExecutableDirective>;
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index e9437e6d46366..f9a61bd018e7c 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -6759,6 +6759,9 @@ class Parser : public CodeCompletionHandler {
                                                 OpenMPClauseKind Kind,
                                                 bool ParseOnly);
 
+  /// Parses the 'looprange' clause of a '#pragma omp fuse' directive.
+  OMPClause *ParseOpenMPLoopRangeClause();
+
   /// Parses the 'sizes' clause of a '#pragma omp tile' directive.
   OMPClause *ParseOpenMPSizesClause();
 
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index 91c3d4bd5210e..93fc0ee16228f 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -457,6 +457,13 @@ class SemaOpenMP : public SemaBase {
                                              Stmt *AStmt,
                                              SourceLocation StartLoc,
                                              SourceLocation EndLoc);
+
+  /// Called on well-formed '#pragma omp fuse' after parsing of its
+  /// clauses and the associated statement.
+  StmtResult ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
+                                      Stmt *AStmt, SourceLocation StartLoc,
+                                      SourceLocation EndLoc);
+
   /// Called on well-formed '\#pragma omp for' after parsing
   /// of the associated statement.
   StmtResult
@@ -915,6 +922,12 @@ class SemaOpenMP : public SemaBase {
                                        SourceLocation StartLoc,
                                        SourceLocation LParenLoc,
                                        SourceLocation EndLoc);
+
+  /// Called on well-form 'looprange' clause after parsing its arguments.
+  OMPClause *
+  ActOnOpenMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc,
+                             SourceLocation LParenLoc, SourceLocation FirstLoc,
+                             SourceLocation CountLoc, SourceLocation EndLoc);
   /// Called on well-formed 'ordered' clause.
   OMPClause *
   ActOnOpenMPOrderedClause(SourceLocation StartLoc, SourceLocation EndLoc,
@@ -1479,7 +1492,83 @@ class SemaOpenMP : public SemaBase {
   bool checkTransformableLoopNest(
       OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops,
       SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
-      Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits);
+      Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits);
+
+  /// Categories of loops encountered during semantic OpenMP loop
+  /// analysis.
+  ///
+  /// This enumeration identifies the structural category of a loop or sequence
+  /// of loops analyzed in the context of OpenMP transformations and directives.
+  /// This categorization helps differentiate between original source loops
+  /// and the structures resulting from applying OpenMP loop transformations.
+  enum class OMPLoopCategory {
+    /// @var OMPLoopCategory::RegularLoop
+    /// Represents a standard canonical loop nest found in the
+    /// original source code or an intact loop after transformations
+    /// (i.e Post/Pre loops of a loopranged fusion).
+    RegularLoop,
+
+    /// @var OMPLoopCategory::TransformSingleLoop
+    /// Represents the resulting loop structure when an OpenMP loop
+    /// transformation, generates a single, top-level loop.
+    TransformSingleLoop,
+  };
+
+  /// Holds the result of the analysis of a (possibly canonical) loop.
+  struct LoopAnalysis {
+    /// Category of the analyzed loop.
+    OMPLoopCategory Category;
+    /// Loop analyses results.
+    OMPLoopBasedDirective::HelperExprs HelperExprs;
+    /// The for-statement of the loop.
+    Stmt *ForStmt;
+    /// Initialization statements before transformations.
+    SmallVector<Stmt *> OriginalInits;
+    /// Initialization statements required after transformation of this loop.
+    SmallVector<Stmt *> TransformsPreInits;
+
+    explicit LoopAnalysis(OMPLoopCategory Category) : Category(Category) {}
+  };
+
+  /// Holds the result of the analysis of a (possibly canonical) loop sequence.
+  struct LoopSequenceAnalysis {
+    /// Number of top level canonical loops.
+    unsigned LoopSeqSize = 0;
+    /// For each loop results of the analysis.
+    std::vector<LoopAnalysis> Loops;
+    /// Additional code required before entering the transformed loop sequence.
+    SmallVector<Stmt *> LoopSequencePreInits;
+  };
+
+  /// The main recursive process of `checkTransformableLoopSequence` that
+  /// performs grammatical parsing of a canonical loop sequence. It extracts
+  /// key information, such as the number of top-level loops, loop statements,
+  /// helper expressions, and other relevant loop-related data, all in a single
+  /// execution to avoid redundant traversals. This analysis flattens inner
+  /// Loop Sequences
+  ///
+  /// \param LoopSeqStmt    The AST of the original statement.
+  /// \param SeqAnalysis    [out] Result of the analysis of \p LoopSeqStmt
+  /// \param Context
+  /// \param Kind           The loop transformation directive kind.
+  /// \return Whether the original statement is both syntactically and
+  /// semantically correct according to OpenMP 6.0 canonical loop
+  /// sequence definition.
+  bool analyzeLoopSequence(Stmt *LoopSeqStmt, LoopSequenceAnalysis &SeqAnalysis,
+                           ASTContext &Context, OpenMPDirectiveKind Kind);
+
+  /// Validates and checks whether a loop sequence can be transformed according
+  /// to the given directive, providing necessary setup and initialization
+  /// (Driver function) before recursion using `analyzeLoopSequence`.
+  ///
+  /// \param Kind           The loop transformation directive kind.
+  /// \param AStmt          The AST of the original statement
+  /// \param SeqAnalysis    [out] Result of the analysis of \p LoopSeqStmt
+  /// \param Context
+  /// \return Whether there was an absence of errors or not
+  bool checkTransformableLoopSequence(OpenMPDirectiveKind Kind, Stmt *AStmt,
+                                      LoopSequenceAnalysis &SeqAnalysis,
+                                      ASTContext &Context);
 
   /// Helper to keep information about the current `omp begin/end declare
   /// variant` nesting.
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 441047d64f48c..99864c7373908 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1951,6 +1951,7 @@ enum StmtCode {
   STMT_OMP_UNROLL_DIRECTIVE,
   STMT_OMP_REVERSE_DIRECTIVE,
   STMT_OMP_INTERCHANGE_DIRECTIVE,
+  STMT_OMP_FUSE_DIRECTIVE,
   STMT_OMP_FOR_DIRECTIVE,
   STMT_OMP_FOR_SIMD_DIRECTIVE,
   STMT_OMP_SECTIONS_DIRECTIVE,
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 588b0dcc6d7b8..2ede492618871 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -1020,6 +1020,26 @@ OMPPartialClause *OMPPartialClause::CreateEmpty(const ASTContext &C) {
   return new (C) OMPPartialClause();
 }
 
+OMPLoopRangeClause *
+OMPLoopRangeClause::Create(const ASTContext &C, SourceLocation StartLoc,
+                           SourceLocation LParenLoc, SourceLocation FirstLoc,
+                           SourceLocation CountLoc, SourceLocation EndLoc,
+                           Expr *First, Expr *Count) {
+  OMPLoopRangeClause *Clause = CreateEmpty(C);
+  Clause->setLocStart(StartLoc);
+  Clause->setLParenLoc(LParenLoc);
+  Clause->setFirstLoc(FirstLoc);
+  Clause->setCountLoc(CountLoc);
+  Clause->setLocEnd(EndLoc);
+  Clause->setFirst(First);
+  Clause->setCount(Count);
+  return Clause;
+}
+
+OMPLoopRangeClause *OMPLoopRangeClause::CreateEmpty(const ASTContext &C) {
+  return new (C) OMPLoopRangeClause();
+}
+
 OMPAllocateClause *OMPAllocateClause::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
     Expr *Allocator, Expr *Alignment, SourceLocation ColonLoc,
@@ -1889,6 +1909,21 @@ void OMPClausePrinter::VisitOMPPartialClause(OMPPartialClause *Node) {
   }
 }
 
+void OMPClausePrinter::VisitOMPLoopRangeClause(OMPLoopRangeClause *Node) {
+  OS << "looprange";
+
+  Expr *First = Node->getFirst();
+  Expr *Count = Node->getCount();
+
+  if (First && Count) {
+    OS << "(";
+    First->printPretty(OS, nullptr, Policy, 0);
+    OS << ",";
+    Count->printPretty(OS, nullptr, Policy, 0);
+    OS << ")";
+  }
+}
+
 void OMPClausePrinter::VisitOMPAllocatorClause(OMPAllocatorClause *Node) {
   OS << "allocator(";
   Node->getAllocator()->printPretty(OS, nullptr, Policy, 0);
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index 36ecaf6489ef0..fec50634005b2 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -125,13 +125,12 @@ OMPLoopBasedDirective::tryToFindNextInnerLoop(Stmt *CurStmt,
 bool OMPLoopBasedDirective::doForAllLoops(
     Stmt *CurStmt, bool TryImperfectlyNestedLoops, unsigned NumLoops,
     llvm::function_ref<bool(unsigned, Stmt *)> Callback,
-    llvm::function_ref<void(OMPCanonicalLoopNestTransformationDirective *)>
+    llvm::function_ref<void(OMPLoopTransformationDirective *)>
         OnTransformationCallback) {
   CurStmt = CurStmt->IgnoreContainers();
   for (unsigned Cnt = 0; Cnt < NumLoops; ++Cnt) {
     while (true) {
-      auto *Dir =
-          dyn_cast<OMPCanonicalLoopNestTransformationDirective>(CurStmt);
+      auto *Dir = dyn_cast<OMPLoopTransformationDirective>(CurStmt);
       if (!Dir)
         break;
 
@@ -370,6 +369,26 @@ OMPForDirective *OMPForDirective::Create(
   return Dir;
 }
 
+Stmt *OMPLoopTransformationDirective::getTransformedStmt() const {
+  if (auto *D = dyn_cast<OMPCanonicalLoopNestTransformationDirective>(S)) {
+    return D->getTransformedStmt();
+  }
+  if (auto *D = dyn_cast<OMPCanonicalLoopSequenceTransformationDirective>(S)) {
+    return D->getTransformedStmt();
+  }
+  llvm_unreachable("unexpected object type");
+}
+
+Stmt *OMPLoopTransformationDirective::getPreInits() const {
+  if (auto *D = dyn_cast<OMPCanonicalLoopNestTransformationDirective>(S)) {
+    return D->getPreInits();
+  }
+  if (auto *D = dyn_cast<OMPCanonicalLoopSequenceTransformationDirective>(S)) {
+    return D->getPreInits();
+  }
+  llvm_unreachable("unexpected object type");
+}
+
 Stmt *OMPCanonicalLoopNestTransformationDirective::getTransformedStmt() const {
   switch (getStmtClass()) {
 #define STMT(CLASS, PARENT)
@@ -379,7 +398,7 @@ Stmt *OMPCanonicalLoopNestTransformationDirective::getTransformedStmt() const {
     return static_cast<const CLASS *>(this)->getTransformedStmt();
 #include "clang/AST/StmtNodes.inc"
   default:
-    llvm_unreachable("Not a loop transformation");
+    llvm_unreachable("Not a loop transformation for canonical loop nests");
   }
 }
 
@@ -392,7 +411,34 @@ Stmt *OMPCanonicalLoopNestTransformationDirective::getPreInits() const {
     return static_cast<const CLASS *>(this)->getPreInits();
 #include "clang/AST/StmtNodes.inc"
   default:
-    llvm_unreachable("Not a loop transformation");
+    llvm_unreachable("Not a loop transformation for canonical loop nests");
+  }
+}
+
+Stmt *
+OMPCanonicalLoopSequenceTransformationDirective::getTransformedStmt() const {
+  switch (getStmtClass()) {
+#define STMT(CLASS, PARENT)
+#define ABSTRACT_STMT(CLASS)
+#define OMPCANONICALLOOPSEQUENCETRANSFORMATIONDIRECTIVE(CLASS, PARENT)         \
+  case Stmt::CLASS##Class:                                                     \
+    return static_cast<const CLASS *>(this)->getTransformedStmt();
+#include "clang/AST/StmtNodes.inc"
+  default:
+    llvm_unreachable("Not a loop transformation for canonical loop sequences");
+  }
+}
+
+Stmt *OMPCanonicalLoopSequenceTransformationDirective::getPreInits() const {
+  switch (getStmtClass()) {
+#define STMT(CLASS, PARENT)
+#define ABSTRACT_STMT(CLASS)
+#define OMPCANONICALLOOPSEQUENCETRANSFORMATIONDIRECTIVE(CLASS, PARENT)         \
+  case Stmt::CLASS##Class:                                                     \
+    return static_cast<const CLASS *>(this)->getPreInits();
+#include "clang/AST/StmtNodes.inc"
+  default:
+    llvm_unreachable("Not a loop transformation for canonical loop sequences");
   }
 }
 
@@ -509,6 +555,29 @@ OMPInterchangeDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
       SourceLocation(), SourceLocation(), NumLoops);
 }
 
+OMPFuseDirective *OMPFuseDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    ArrayRef<OMPClause *> Clauses, unsigned NumLoops, unsigned NumLoopsFused,
+    Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits) {
+
+  OMPFuseDirective *Dir = createDirective<OMPFuseDirective>(
+      C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc,
+      NumLoops);
+  Dir->setTransformedStmt(TransformedStmt);
+  Dir->setPreInits(PreInits);
+  Dir->setNumGeneratedLoops(NumLoopsFused);
+  return Dir;
+}
+
+OMPFuseDirective *OMPFuseDirective::CreateEmpty(const ASTContext &C,
+                                                unsigned NumClauses,
+                                                unsigned NumLoops) {
+  OMPFuseDirective *Dir = createEmptyDirective<OMPFuseDirective>(
+      C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1,
+      SourceLocation(), SourceLocation(), NumLoops);
+  return Dir;
+}
+
 OMPForSimdDirective *
 OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
                             SourceLocation EndLoc, unsigned CollapsedNum,
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index afccba8778fd2..7f1501507c6c7 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -786,6 +786,11 @@ void StmtPrinter::VisitOMPInterchangeDirective(OMPInterchangeDirective *Node) {
   PrintOMPExecutableDirective(Node);
 }
 
+void StmtPrinter::VisitOMPFuseDirective(OMPFuseDirective *Node) {
+  Indent() << "#pragma omp fuse";
+  PrintOMPExecutableDirective(Node);
+}
+
 void StmtPrinter::VisitOMPForDirective(OMPForDirective *Node) {
   Indent() << "#pragma omp for";
   PrintOMPExecutableDirective(Node);
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 7a9b7fb431099..5d99dab38f15d 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -510,6 +510,13 @@ void OMPClauseProfiler::VisitOMPPartialClause(const OMPPartialClause *C) {
     Profiler->VisitExpr(Factor);
 }
 
+void OMPClauseProfiler::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) {
+  if (const Expr *First = C->getFirst())
+    Profiler->VisitExpr(First);
+  if (const Expr *Count = C->getCount())
+    Profiler->VisitExpr(Count);
+}
+
 void OMPClauseProfiler::VisitOMPAllocatorClause(const OMPAllocatorClause *C) {
   if (C->getAllocator())
     Profiler->VisitStmt(C->getAllocator());
@@ -1025,6 +1032,15 @@ void StmtProfiler::VisitOMPInterchangeDirective(
   VisitOMPCanonicalLoopNestTransformationDirective(S);
 }
 
+void StmtProfiler::VisitOMPCanonicalLoopSequenceTransformationDirective(
+    const OMPCanonicalLoopSequenceTransformationDirective *S) {
+  VisitOMPExecutableDirective(S);
+}
+
+void StmtProfiler::VisitOMPFuseDirective(const OMPFuseDirective *S) {
+  VisitOMPCanonicalLoopSequenceTransformationDirective(S);
+}
+
 void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) {
   VisitOMPLoopDirective(S);
 }
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index 3f8f64df8702e..246a569940155 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -256,6 +256,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str,
   case OMPC_affinity:
   case OMPC_when:
   case OMPC_append_args:
+  case OMPC_looprange:
     break;
   default:
     break;
@@ -600,6 +601,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind,
   case OMPC_affinity:
   case OMPC_when:
   case OMPC_append_args:
+  case OMPC_looprange:
     break;
   default:
     break;
@@ -723,9 +725,14 @@ bool clang::isOpenMPCanonicalLoopNestTransformationDirective(
          DKind == OMPD_interchange || DKind == OMPD_stripe;
 }
 
+bool clang::isOpenMPCanonicalLoopSequenceTransformationDirective(
+    OpenMPDirectiveKind DKind) {
+  return DKind == OMPD_fuse;
+}
+
 bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) {
-  // FIXME: There will be more cases when we implement 'fuse'.
-  return isOpenMPCanonicalLoopNestTransformationDirective(DKind);
+  return isOpenMPCanonicalLoopNestTransformationDirective(DKind) ||
+         isOpenMPCanonicalLoopSequenceTransformationDirective(DKind);
 }
 
 bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) {
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 031ef73214e76..f134dab9387a2 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -234,6 +234,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
   case Stmt::OMPInterchangeDirectiveClass:
     EmitOMPInterchangeDirective(cast<OMPInterchangeDirective>(*S));
     break;
+  case Stmt::OMPFuseDirectiveClass:
+    EmitOMPFuseDirective(cast<OMPFuseDirective>(*S));
+    break;
   case Stmt::OMPForDirectiveClass:
     EmitOMPForDirective(cast<OMPForDirective>(*S));
     break;
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 6f795b45bc381..db96eae60930b 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -201,6 +201,24 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
     } else {
       llvm_unreachable("Unknown loop-based directive kind.");
     }
+    doEmitPreinits(PreInits);
+    PreCondVars.restore(CGF);
+  }
+
+  void
+  emitPreInitStmt(CodeGenFunction &CGF,
+                  const OMPCanonicalLoopSequenceTransformationDirective &S) {
+    const Stmt *PreInits;
+    if (const auto *Fuse = dyn_cast<OMPFuseDirective>(&S)) {
+      PreInits = Fuse->getPreInits();
+    } else {
+      llvm_unreachable(
+          "Unknown canonical loop sequence transform directive kind.");
+    }
+    doEmitPreinits(PreInits);
+  }
+
+  void doEmitPreinits(const Stmt *PreInits) {
     if (PreInits) {
       // CompoundStmts and DeclStmts are used as lists of PreInit statements and
       // declarations. Since declarations must be visible in the the following
@@ -222,7 +240,6 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
         CGF.EmitStmt(S);
       }
     }
-    PreCondVars.restore(CGF);
   }
 
 public:
@@ -230,6 +247,11 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
       : CodeGenFunction::RunCleanupsScope(CGF) {
     emitPreInitStmt(CGF, S);
   }
+  OMPLoopScope(CodeGenFunction &CGF,
+               const OMPCanonicalLoopSequenceTransformationDirective &S)
+      : CodeGenFunction::RunCleanupsScope(CGF) {
+    emitPreInitStmt(CGF, S);
+  }
 };
 
 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
@@ -1900,6 +1922,15 @@ class OMPTransformDirectiveScopeRAII {
       CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
       CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
     }
+    if (const auto *Dir =
+            dyn_cast<OMPCanonicalLoopSequenceTransformationDirective>(S)) {
+      // For simplicity we reuse the loop scope similarly to what we do with
+      // OMPCanonicalLoopNestTransformationDirective do by being a subclass
+      // of OMPLoopBasedDirective.
+      Scope = new OMPLoopScope(CGF, *Dir);
+      CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
+      CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
+    }
   }
   ~OMPTransformDirectiveScopeRAII() {
     if (!Scope)
@@ -1927,8 +1958,7 @@ static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
     return;
   }
   if (SimplifiedS == NextLoop) {
-    if (auto *Dir =
-            dyn_cast<OMPCanonicalLoopNestTransformationDirective>(SimplifiedS))
+    if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS))
       SimplifiedS = Dir->getTransformedStmt();
     if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
       SimplifiedS = CanonLoop->getLoopStmt();
@@ -2923,6 +2953,12 @@ void CodeGenFunction::EmitOMPInterchangeDirective(
   EmitStmt(S.getTransformedStmt());
 }
 
+void CodeGenFunction::EmitOMPFuseDirective(const OMPFuseDirective &S) {
+  // Emit the de-sugared statement
+  OMPTransformDirectiveScopeRAII FuseScope(*this, &S);
+  EmitStmt(S.getTransformedStmt());
+}
+
 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
   bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
 
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index a562a6a1ea6e1..301ada0591789 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3856,6 +3856,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   void EmitOMPUnrollDirective(const OMPUnrollDirective &S);
   void EmitOMPReverseDirective(const OMPReverseDirective &S);
   void EmitOMPInterchangeDirective(const OMPInterchangeDirective &S);
+  void EmitOMPFuseDirective(const OMPFuseDirective &S);
   void EmitOMPForDirective(const OMPForDirective &S);
   void EmitOMPForSimdDirective(const OMPForSimdDirective &S);
   void EmitOMPScopeDirective(const OMPScopeDirective &S);
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index 5db2f2e2ccf86..d48e6436ba9aa 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -2937,6 +2937,39 @@ OMPClause *Parser::ParseOpenMPSizesClause() {
                                                  OpenLoc, CloseLoc);
 }
 
+OMPClause *Parser::ParseOpenMPLoopRangeClause() {
+  SourceLocation ClauseNameLoc = ConsumeToken();
+  SourceLocation FirstLoc, CountLoc;
+
+  BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end);
+  if (T.consumeOpen()) {
+    Diag(Tok, diag::err_expected) << tok::l_paren;
+    return nullptr;
+  }
+
+  FirstLoc = Tok.getLocation();
+  ExprResult FirstVal = ParseConstantExpression();
+  if (!FirstVal.isUsable()) {
+    T.skipToEnd();
+    return nullptr;
+  }
+
+  ExpectAndConsume(tok::comma);
+
+  CountLoc = Tok.getLocation();
+  ExprResult CountVal = ParseConstantExpression();
+  if (!CountVal.isUsable()) {
+    T.skipToEnd();
+    return nullptr;
+  }
+
+  T.consumeClose();
+
+  return Actions.OpenMP().ActOnOpenMPLoopRangeClause(
+      FirstVal.get(), CountVal.get(), ClauseNameLoc, T.getOpenLocation(),
+      FirstLoc, CountLoc, T.getCloseLocation());
+}
+
 OMPClause *Parser::ParseOpenMPPermutationClause() {
   SourceLocation ClauseNameLoc, OpenLoc, CloseLoc;
   SmallVector<Expr *> ArgExprs;
@@ -3366,6 +3399,9 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
     }
     Clause = ParseOpenMPClause(CKind, WrongDirective);
     break;
+  case OMPC_looprange:
+    Clause = ParseOpenMPLoopRangeClause();
+    break;
   default:
     break;
   }
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index 552c92996dc2e..a0483c3027199 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1493,6 +1493,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
   case Stmt::OMPUnrollDirectiveClass:
   case Stmt::OMPReverseDirectiveClass:
   case Stmt::OMPInterchangeDirectiveClass:
+  case Stmt::OMPFuseDirectiveClass:
   case Stmt::OMPSingleDirectiveClass:
   case Stmt::OMPTargetDataDirectiveClass:
   case Stmt::OMPTargetDirectiveClass:
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index a02850c66b4fe..671f9c8459c48 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -4405,6 +4405,7 @@ void SemaOpenMP::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind,
   case OMPD_unroll:
   case OMPD_reverse:
   case OMPD_interchange:
+  case OMPD_fuse:
   case OMPD_assume:
     break;
   default:
@@ -6222,6 +6223,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective(
     Res = ActOnOpenMPInterchangeDirective(ClausesWithImplicit, AStmt, StartLoc,
                                           EndLoc);
     break;
+  case OMPD_fuse:
+    Res =
+        ActOnOpenMPFuseDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc);
+    break;
   case OMPD_for:
     Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc,
                                   VarsWithInheritedDSA);
@@ -9299,7 +9304,9 @@ static bool checkOpenMPIterationSpace(
     // sharing attributes.
     VarsWithImplicitDSA.erase(LCDecl);
 
-    assert(isOpenMPLoopDirective(DKind) && "DSA for non-loop vars");
+    assert((isOpenMPLoopDirective(DKind) ||
+            isOpenMPCanonicalLoopSequenceTransformationDirective(DKind)) &&
+           "DSA for non-loop vars");
 
     // Check test-expr.
     HasErrors |= ISC.checkAndSetCond(For ? For->getCond() : CXXFor->getCond());
@@ -9727,7 +9734,8 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
   unsigned NumLoops = std::max(OrderedLoopCount, NestedLoopCount);
   SmallVector<LoopIterationSpace, 4> IterSpaces(NumLoops);
   if (!OMPLoopBasedDirective::doForAllLoops(
-          AStmt->IgnoreContainers(!isOpenMPLoopTransformationDirective(DKind)),
+          AStmt->IgnoreContainers(
+              !isOpenMPCanonicalLoopNestTransformationDirective(DKind)),
           SupportsNonPerfectlyNested, NumLoops,
           [DKind, &SemaRef, &DSA, NumLoops, NestedLoopCount,
            CollapseLoopCountExpr, OrderedLoopCountExpr, &VarsWithImplicitDSA,
@@ -9749,8 +9757,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
             }
             return false;
           },
-          [&SemaRef,
-           &Captures](OMPCanonicalLoopNestTransformationDirective *Transform) {
+          [&SemaRef, &Captures](OMPLoopTransformationDirective *Transform) {
             Stmt *DependentPreInits = Transform->getPreInits();
             if (!DependentPreInits)
               return;
@@ -9765,7 +9772,8 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
                   auto *D = cast<VarDecl>(C);
                   DeclRefExpr *Ref = buildDeclRefExpr(
                       SemaRef, D, D->getType().getNonReferenceType(),
-                      Transform->getBeginLoc());
+                      cast<OMPExecutableDirective>(Transform->getDirective())
+                          ->getBeginLoc());
                   Captures[Ref] = Ref;
                 }
               }
@@ -14210,10 +14218,34 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective(
       getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
 }
 
+/// Updates OriginalInits by checking Transform against loop transformation
+/// directives and appending their pre-inits if a match is found.
+static void updatePreInits(OMPLoopTransformationDirective *Transform,
+                           SmallVectorImpl<Stmt *> &PreInits) {
+  Stmt *Dir = Transform->getDirective();
+  switch (Dir->getStmtClass()) {
+#define STMT(CLASS, PARENT)
+#define ABSTRACT_STMT(CLASS)
+#define COMMON_OMP_LOOP_TRANSFORMATION(CLASS, PARENT)                          \
+  case Stmt::CLASS##Class:                                                     \
+    appendFlattenedStmtList(PreInits,                                          \
+                            static_cast<const CLASS *>(Dir)->getPreInits());   \
+    break;
+#define OMPCANONICALLOOPNESTTRANSFORMATIONDIRECTIVE(CLASS, PARENT)             \
+  COMMON_OMP_LOOP_TRANSFORMATION(CLASS, PARENT)
+#define OMPCANONICALLOOPSEQUENCETRANSFORMATIONDIRECTIVE(CLASS, PARENT)         \
+  COMMON_OMP_LOOP_TRANSFORMATION(CLASS, PARENT)
+#include "clang/AST/StmtNodes.inc"
+#undef COMMON_OMP_LOOP_TRANSFORMATION
+  default:
+    llvm_unreachable("Not a loop transformation");
+  }
+}
+
 bool SemaOpenMP::checkTransformableLoopNest(
     OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops,
     SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
-    Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits) {
+    Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits) {
   OriginalInits.emplace_back();
   bool Result = OMPLoopBasedDirective::doForAllLoops(
       AStmt->IgnoreContainers(), /*TryImperfectlyNestedLoops=*/false, NumLoops,
@@ -14239,29 +14271,282 @@ bool SemaOpenMP::checkTransformableLoopNest(
         OriginalInits.emplace_back();
         return false;
       },
-      [&OriginalInits](OMPLoopBasedDirective *Transform) {
-        Stmt *DependentPreInits;
-        if (auto *Dir = dyn_cast<OMPTileDirective>(Transform))
-          DependentPreInits = Dir->getPreInits();
-        else if (auto *Dir = dyn_cast<OMPStripeDirective>(Transform))
-          DependentPreInits = Dir->getPreInits();
-        else if (auto *Dir = dyn_cast<OMPUnrollDirective>(Transform))
-          DependentPreInits = Dir->getPreInits();
-        else if (auto *Dir = dyn_cast<OMPReverseDirective>(Transform))
-          DependentPreInits = Dir->getPreInits();
-        else if (auto *Dir = dyn_cast<OMPInterchangeDirective>(Transform))
-          DependentPreInits = Dir->getPreInits();
-        else
-          llvm_unreachable("Unhandled loop transformation");
-
-        appendFlattenedStmtList(OriginalInits.back(), DependentPreInits);
+      [&OriginalInits](OMPLoopTransformationDirective *Transform) {
+        updatePreInits(Transform, OriginalInits.back());
       });
   assert(OriginalInits.back().empty() && "No preinit after innermost loop");
   OriginalInits.pop_back();
   return Result;
 }
 
-/// Add preinit statements that need to be propageted from the selected loop.
+/// Counts the total number of OpenMP canonical nested loops, including the
+/// outermost loop (the original loop). PRECONDITION of this visitor is that it
+/// must be invoked from the original loop to be analyzed. The traversal stops
+/// for Decl's and Expr's given that they may contain inner loops that must not
+/// be counted.
+///
+/// Example AST structure for the code:
+///
+/// int main() {
+///     #pragma omp fuse
+///     {
+///         for (int i = 0; i < 100; i++) {    <-- Outer loop
+///             []() {
+///                 for(int j = 0; j < 100; j++) {}  <-- NOT A LOOP (1)
+///             };
+///             for(int j = 0; j < 5; ++j) {}    <-- Inner loop
+///         }
+///         for (int r = 0; i < 100; i++) {    <-- Outer loop
+///             struct LocalClass {
+///                 void bar() {
+///                     for(int j = 0; j < 100; j++) {}  <-- NOT A LOOP (2)
+///                 }
+///             };
+///             for(int k = 0; k < 10; ++k) {}    <-- Inner loop
+///             {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP (3)
+///         }
+///     }
+/// }
+/// (1) because in a different function (here: a lambda)
+/// (2) because in a different function (here: class method)
+/// (3) because considered to be intervening-code of non-perfectly nested loop
+/// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops.
+class NestedLoopCounterVisitor final : public DynamicRecursiveASTVisitor {
+private:
+  unsigned NestedLoopCount = 0;
+
+public:
+  explicit NestedLoopCounterVisitor() = default;
+
+  unsigned getNestedLoopCount() const { return NestedLoopCount; }
+
+  bool VisitForStmt(ForStmt *FS) override {
+    ++NestedLoopCount;
+    return true;
+  }
+
+  bool VisitCXXForRangeStmt(CXXForRangeStmt *FRS) override {
+    ++NestedLoopCount;
+    return true;
+  }
+
+  bool TraverseStmt(Stmt *S) override {
+    if (!S)
+      return true;
+
+    // Skip traversal of all expressions, including special cases like
+    // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions
+    // may contain inner statements (and even loops), but they are not part
+    // of the syntactic body of the surrounding loop structure.
+    //  Therefore must not be counted.
+    if (isa<Expr>(S))
+      return true;
+
+    // Only recurse into CompoundStmt (block {}) and loop bodies.
+    if (isa<CompoundStmt, ForStmt, CXXForRangeStmt>(S)) {
+      return DynamicRecursiveASTVisitor::TraverseStmt(S);
+    }
+
+    // Stop traversal of the rest of statements, that break perfect
+    // loop nesting, such as control flow (IfStmt, SwitchStmt...).
+    return true;
+  }
+
+  bool TraverseDecl(Decl *D) override {
+    // Stop in the case of finding a declaration, it is not important
+    // in order to find nested loops (Possible CXXRecordDecl, RecordDecl,
+    // FunctionDecl...).
+    return true;
+  }
+};
+
+bool SemaOpenMP::analyzeLoopSequence(Stmt *LoopSeqStmt,
+                                     LoopSequenceAnalysis &SeqAnalysis,
+                                     ASTContext &Context,
+                                     OpenMPDirectiveKind Kind) {
+  VarsWithInheritedDSAType TmpDSA;
+  // Helper Lambda to handle storing initialization and body statements for
+  // both ForStmt and CXXForRangeStmt.
+  auto StoreLoopStatements = [](LoopAnalysis &Analysis, Stmt *LoopStmt) {
+    if (auto *For = dyn_cast<ForStmt>(LoopStmt)) {
+      Analysis.OriginalInits.push_back(For->getInit());
+      Analysis.ForStmt = For;
+    } else {
+      auto *CXXFor = cast<CXXForRangeStmt>(LoopStmt);
+      Analysis.OriginalInits.push_back(CXXFor->getBeginStmt());
+      Analysis.ForStmt = CXXFor;
+    }
+  };
+
+  // Helper lambda functions to encapsulate the processing of different
+  // derivations of the canonical loop sequence grammar
+  // Modularized code for handling loop generation and transformations.
+  auto AnalyzeLoopGeneration = [&](Stmt *Child) {
+    auto *LoopTransform = cast<OMPLoopTransformationDirective>(Child);
+    Stmt *TransformedStmt = LoopTransform->getTransformedStmt();
+    unsigned NumGeneratedLoops = LoopTransform->getNumGeneratedLoops();
+    // Handle the case where transformed statement is not available due to
+    // dependent contexts
+    if (!TransformedStmt) {
+      if (NumGeneratedLoops > 0) {
+        SeqAnalysis.LoopSeqSize += NumGeneratedLoops;
+        return true;
+      }
+      // Unroll full (0 loops produced)
+      Diag(Child->getBeginLoc(), diag::err_omp_not_for)
+          << 0 << getOpenMPDirectiveName(Kind);
+      return false;
+    }
+    // Handle loop transformations with multiple loop nests
+    // Unroll full
+    if (!NumGeneratedLoops) {
+      Diag(Child->getBeginLoc(), diag::err_omp_not_for)
+          << 0 << getOpenMPDirectiveName(Kind);
+      return false;
+    }
+    // Loop transformatons such as split or loopranged fuse
+    if (NumGeneratedLoops > 1) {
+      // Get the preinits related to this loop sequence generating
+      // loop transformation (i.e loopranged fuse, split...)
+      // These preinits differ slightly from regular inits/pre-inits related
+      // to single loop generating loop transformations (interchange, unroll)
+      // given that they are not bounded to a particular loop nest
+      // so they need to be treated independently
+      updatePreInits(LoopTransform, SeqAnalysis.LoopSequencePreInits);
+      return analyzeLoopSequence(TransformedStmt, SeqAnalysis, Context, Kind);
+    }
+    // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all)
+    // Process the transformed loop statement
+    LoopAnalysis &NewTransformedSingleLoop =
+        SeqAnalysis.Loops.emplace_back(OMPLoopCategory::TransformSingleLoop);
+    unsigned IsCanonical = checkOpenMPLoop(
+        Kind, nullptr, nullptr, TransformedStmt, SemaRef, *DSAStack, TmpDSA,
+        NewTransformedSingleLoop.HelperExprs);
+
+    if (!IsCanonical) {
+      Diag(TransformedStmt->getBeginLoc(), diag::err_omp_not_canonical_loop)
+          << getOpenMPDirectiveName(Kind);
+      return false;
+    }
+    StoreLoopStatements(NewTransformedSingleLoop, TransformedStmt);
+    updatePreInits(LoopTransform, NewTransformedSingleLoop.TransformsPreInits);
+
+    SeqAnalysis.LoopSeqSize++;
+    return true;
+  };
+
+  // Modularized code for handling regular canonical loops.
+  auto AnalyzeRegularLoop = [&](Stmt *Child) {
+    LoopAnalysis &NewRegularLoop =
+        SeqAnalysis.Loops.emplace_back(OMPLoopCategory::RegularLoop);
+    unsigned IsCanonical =
+        checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack,
+                        TmpDSA, NewRegularLoop.HelperExprs);
+
+    if (!IsCanonical) {
+      Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop)
+          << getOpenMPDirectiveName(Kind);
+      return false;
+    }
+
+    StoreLoopStatements(NewRegularLoop, Child);
+    NestedLoopCounterVisitor NLCV;
+    NLCV.TraverseStmt(Child);
+    return true;
+  };
+
+  // Helper functions to validate loop sequence grammar derivations.
+  auto IsLoopSequenceDerivation = [](auto *Child) {
+    return isa<ForStmt, CXXForRangeStmt, OMPLoopTransformationDirective>(Child);
+  };
+  // Helper functions to validate loop generating grammar derivations.
+  auto IsLoopGeneratingStmt = [](auto *Child) {
+    return isa<OMPLoopTransformationDirective>(Child);
+  };
+
+  // High level grammar validation.
+  for (Stmt *Child : LoopSeqStmt->children()) {
+    if (!Child)
+      continue;
+    // Skip over non-loop-sequence statements.
+    if (!IsLoopSequenceDerivation(Child)) {
+      Child = Child->IgnoreContainers();
+      // Ignore empty compound statement.
+      if (!Child)
+        continue;
+      // In the case of a nested loop sequence ignoring containers would not
+      // be enough, a recurisve transversal of the loop sequence is required.
+      if (isa<CompoundStmt>(Child)) {
+        if (!analyzeLoopSequence(Child, SeqAnalysis, Context, Kind))
+          return false;
+        // Already been treated, skip this children
+        continue;
+      }
+    }
+    // Regular loop sequence handling.
+    if (IsLoopSequenceDerivation(Child)) {
+      if (IsLoopGeneratingStmt(Child)) {
+        if (!AnalyzeLoopGeneration(Child))
+          return false;
+        // AnalyzeLoopGeneration updates SeqAnalysis.LoopSeqSize accordingly.
+      } else {
+        if (!AnalyzeRegularLoop(Child))
+          return false;
+        SeqAnalysis.LoopSeqSize++;
+      }
+    } else {
+      // Report error for invalid statement inside canonical loop sequence.
+      Diag(Child->getBeginLoc(), diag::err_omp_not_for)
+          << 0 << getOpenMPDirectiveName(Kind);
+      return false;
+    }
+  }
+  return true;
+}
+
+bool SemaOpenMP::checkTransformableLoopSequence(
+    OpenMPDirectiveKind Kind, Stmt *AStmt, LoopSequenceAnalysis &SeqAnalysis,
+    ASTContext &Context) {
+  // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows
+  // the grammar:
+  //
+  // canonical-loop-sequence:
+  //  {
+  //    loop-sequence+
+  //  }
+  // where loop-sequence can be any of the following:
+  // 1. canonical-loop-sequence
+  // 2. loop-nest
+  // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective)
+  //
+  // To recognise and traverse this structure the helper function
+  // analyzeLoopSequence serves as the recurisve entry point
+  // and tries to match the input AST to the canonical loop sequence grammar
+  // structure. This function will perform both a semantic and syntactical
+  // analysis of the given statement according to OpenMP 6.0 definition of
+  // the aforementioned canonical loop sequence.
+
+  // We expect an outer compound statement.
+  if (!isa<CompoundStmt>(AStmt)) {
+    Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence)
+        << getOpenMPDirectiveName(Kind);
+    return false;
+  }
+
+  // Recursive entry point to process the main loop sequence
+  if (!analyzeLoopSequence(AStmt, SeqAnalysis, Context, Kind))
+    return false;
+
+  // Diagnose an empty loop sequence.
+  if (SeqAnalysis.LoopSeqSize <= 0) {
+    Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence)
+        << getOpenMPDirectiveName(Kind);
+    return false;
+  }
+  return true;
+}
+
+/// Add preinit statements that need to be propagated from the selected loop.
 static void addLoopPreInits(ASTContext &Context,
                             OMPLoopBasedDirective::HelperExprs &LoopHelper,
                             Stmt *LoopStmt, ArrayRef<Stmt *> OriginalInit,
@@ -14346,7 +14631,7 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
   // Verify and diagnose loop nest.
   SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops);
   Stmt *Body = nullptr;
-  SmallVector<SmallVector<Stmt *, 0>, 4> OriginalInits;
+  SmallVector<SmallVector<Stmt *>, 4> OriginalInits;
   if (!checkTransformableLoopNest(OMPD_tile, AStmt, NumLoops, LoopHelpers, Body,
                                   OriginalInits))
     return StmtError();
@@ -14623,7 +14908,7 @@ StmtResult SemaOpenMP::ActOnOpenMPStripeDirective(ArrayRef<OMPClause *> Clauses,
   // Verify and diagnose loop nest.
   SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops);
   Stmt *Body = nullptr;
-  SmallVector<SmallVector<Stmt *, 0>, 4> OriginalInits;
+  SmallVector<SmallVector<Stmt *>, 4> OriginalInits;
   if (!checkTransformableLoopNest(OMPD_stripe, AStmt, NumLoops, LoopHelpers,
                                   Body, OriginalInits))
     return StmtError();
@@ -14884,7 +15169,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
   Stmt *Body = nullptr;
   SmallVector<OMPLoopBasedDirective::HelperExprs, NumLoops> LoopHelpers(
       NumLoops);
-  SmallVector<SmallVector<Stmt *, 0>, NumLoops + 1> OriginalInits;
+  SmallVector<SmallVector<Stmt *>, NumLoops + 1> OriginalInits;
   if (!checkTransformableLoopNest(OMPD_unroll, AStmt, NumLoops, LoopHelpers,
                                   Body, OriginalInits))
     return StmtError();
@@ -15152,7 +15437,7 @@ StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt,
   Stmt *Body = nullptr;
   SmallVector<OMPLoopBasedDirective::HelperExprs, NumLoops> LoopHelpers(
       NumLoops);
-  SmallVector<SmallVector<Stmt *, 0>, NumLoops + 1> OriginalInits;
+  SmallVector<SmallVector<Stmt *>, NumLoops + 1> OriginalInits;
   if (!checkTransformableLoopNest(OMPD_reverse, AStmt, NumLoops, LoopHelpers,
                                   Body, OriginalInits))
     return StmtError();
@@ -15344,7 +15629,7 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective(
   // Verify and diagnose loop nest.
   SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops);
   Stmt *Body = nullptr;
-  SmallVector<SmallVector<Stmt *, 0>, 2> OriginalInits;
+  SmallVector<SmallVector<Stmt *>, 2> OriginalInits;
   if (!checkTransformableLoopNest(OMPD_interchange, AStmt, NumLoops,
                                   LoopHelpers, Body, OriginalInits))
     return StmtError();
@@ -15520,6 +15805,490 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective(
                                          buildPreInits(Context, PreInits));
 }
 
+StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
+                                                Stmt *AStmt,
+                                                SourceLocation StartLoc,
+                                                SourceLocation EndLoc) {
+
+  ASTContext &Context = getASTContext();
+  DeclContext *CurrContext = SemaRef.CurContext;
+  Scope *CurScope = SemaRef.getCurScope();
+  CaptureVars CopyTransformer(SemaRef);
+
+  // Ensure the structured block is not empty
+  if (!AStmt)
+    return StmtError();
+
+  // Defer transformation in dependent contexts
+  // The NumLoopNests argument is set to a placeholder 1 (even though
+  // using looprange fuse could yield up to 3 top level loop nests)
+  // because a dependent context could prevent determining its true value
+  if (CurrContext->isDependentContext()) {
+    return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses,
+                                    /* NumLoops */ 1, /* LoopSeqSize */ 1,
+                                    AStmt, nullptr, nullptr);
+  }
+
+  // Validate that the potential loop sequence is transformable for fusion
+  // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops
+  LoopSequenceAnalysis SeqAnalysis;
+  if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, SeqAnalysis, Context))
+    return StmtError();
+
+  // SeqAnalysis.LoopSeqSize exists mostly to handle dependent contexts,
+  // otherwise it must be the same as SeqAnalysis.Loops.size().
+  assert(SeqAnalysis.LoopSeqSize == SeqAnalysis.Loops.size());
+
+  // Handle clauses, which can be any of the following: [looprange, apply]
+  const OMPLoopRangeClause *LRC =
+      OMPExecutableDirective::getSingleClause<OMPLoopRangeClause>(Clauses);
+
+  // The clause arguments are invalidated if any error arises
+  // such as non-constant or non-positive arguments
+  if (LRC && (!LRC->getFirst() || !LRC->getCount()))
+    return StmtError();
+
+  // Delayed semantic check of LoopRange constraint
+  // Evaluates the loop range arguments and returns the first and count values
+  auto EvaluateLoopRangeArguments = [&Context](Expr *First, Expr *Count,
+                                               uint64_t &FirstVal,
+                                               uint64_t &CountVal) {
+    llvm::APSInt FirstInt = First->EvaluateKnownConstInt(Context);
+    llvm::APSInt CountInt = Count->EvaluateKnownConstInt(Context);
+    FirstVal = FirstInt.getZExtValue();
+    CountVal = CountInt.getZExtValue();
+  };
+
+  // OpenMP [6.0, Restrictions]
+  // first + count - 1 must not evaluate to a value greater than the
+  // loop sequence length of the associated canonical loop sequence.
+  auto ValidLoopRange = [](uint64_t FirstVal, uint64_t CountVal,
+                           unsigned NumLoops) -> bool {
+    return FirstVal + CountVal - 1 <= NumLoops;
+  };
+  uint64_t FirstVal = 1, CountVal = 0, LastVal = SeqAnalysis.LoopSeqSize;
+
+  // Validates the loop range after evaluating the semantic information
+  // and ensures that the range is valid for the given loop sequence size.
+  // Expressions are evaluated at compile time to obtain constant values.
+  if (LRC) {
+    EvaluateLoopRangeArguments(LRC->getFirst(), LRC->getCount(), FirstVal,
+                               CountVal);
+    if (CountVal == 1)
+      SemaRef.Diag(LRC->getCountLoc(), diag::warn_omp_redundant_fusion)
+          << getOpenMPDirectiveName(OMPD_fuse);
+
+    if (!ValidLoopRange(FirstVal, CountVal, SeqAnalysis.LoopSeqSize)) {
+      SemaRef.Diag(LRC->getFirstLoc(), diag::err_omp_invalid_looprange)
+          << getOpenMPDirectiveName(OMPD_fuse) << FirstVal
+          << (FirstVal + CountVal - 1) << SeqAnalysis.LoopSeqSize;
+      return StmtError();
+    }
+
+    LastVal = FirstVal + CountVal - 1;
+  }
+
+  // Complete fusion generates a single canonical loop nest
+  // However looprange clause may generate several loop nests
+  unsigned NumLoopNests = LRC ? SeqAnalysis.LoopSeqSize - CountVal + 1 : 1;
+
+  // Emit a warning for redundant loop fusion when the sequence contains only
+  // one loop.
+  if (SeqAnalysis.LoopSeqSize == 1)
+    SemaRef.Diag(AStmt->getBeginLoc(), diag::warn_omp_redundant_fusion)
+        << getOpenMPDirectiveName(OMPD_fuse);
+
+  // Select the type with the largest bit width among all induction variables
+  QualType IVType =
+      SeqAnalysis.Loops[FirstVal - 1].HelperExprs.IterationVarRef->getType();
+  for (unsigned I = FirstVal; I < LastVal; ++I) {
+    QualType CurrentIVType =
+        SeqAnalysis.Loops[I].HelperExprs.IterationVarRef->getType();
+    if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) {
+      IVType = CurrentIVType;
+    }
+  }
+  uint64_t IVBitWidth = Context.getIntWidth(IVType);
+
+  // Create pre-init declarations for all loops lower bounds, upper bounds,
+  // strides and num-iterations for every top level loop in the fusion
+  SmallVector<VarDecl *, 4> LBVarDecls;
+  SmallVector<VarDecl *, 4> STVarDecls;
+  SmallVector<VarDecl *, 4> NIVarDecls;
+  SmallVector<VarDecl *, 4> UBVarDecls;
+  SmallVector<VarDecl *, 4> IVVarDecls;
+
+  // Helper lambda to create variables for bounds, strides, and other
+  // expressions. Generates both the variable declaration and the corresponding
+  // initialization statement.
+  auto CreateHelperVarAndStmt =
+      [&, &SemaRef = SemaRef](Expr *ExprToCopy, const std::string &BaseName,
+                              unsigned I, bool NeedsNewVD = false) {
+        Expr *TransformedExpr =
+            AssertSuccess(CopyTransformer.TransformExpr(ExprToCopy));
+        if (!TransformedExpr)
+          return std::pair<VarDecl *, StmtResult>(nullptr, StmtError());
+
+        auto Name = (Twine(".omp.") + BaseName + std::to_string(I)).str();
+
+        VarDecl *VD;
+        if (NeedsNewVD) {
+          VD = buildVarDecl(SemaRef, SourceLocation(), IVType, Name);
+          SemaRef.AddInitializerToDecl(VD, TransformedExpr, false);
+
+        } else {
+          // Create a unique variable name
+          DeclRefExpr *DRE = cast<DeclRefExpr>(TransformedExpr);
+          VD = cast<VarDecl>(DRE->getDecl());
+          VD->setDeclName(&SemaRef.PP.getIdentifierTable().get(Name));
+        }
+        // Create the corresponding declaration statement
+        StmtResult DeclStmt = new (Context) class DeclStmt(
+            DeclGroupRef(VD), SourceLocation(), SourceLocation());
+        return std::make_pair(VD, DeclStmt);
+      };
+
+  // PreInits hold a sequence of variable declarations that must be executed
+  // before the fused loop begins. These include bounds, strides, and other
+  // helper variables required for the transformation. Other loop transforms
+  // also contain their own preinits
+  SmallVector<Stmt *> PreInits;
+
+  //  Update the general preinits using the preinits generated by loop sequence
+  //  generating loop transformations. These preinits differ slightly from
+  //  single-loop transformation preinits, as they can be detached from a
+  //  specific loop inside multiple generated loop nests. This happens
+  //  because certain helper variables, like '.omp.fuse.max', are introduced to
+  //  handle fused iteration spaces and may not be directly tied to a single
+  //  original loop. The preinit structure must ensure that hidden variables
+  //  like '.omp.fuse.max' are still properly handled.
+  // Transformations that apply this concept: Loopranged Fuse, Split
+  if (!SeqAnalysis.LoopSequencePreInits.empty()) {
+    llvm::append_range(PreInits, SeqAnalysis.LoopSequencePreInits);
+  }
+
+  // Process each single loop to generate and collect declarations
+  // and statements for all helper expressions related to
+  // particular single loop nests
+
+  // Also In the case of the fused loops, we keep track of their original
+  // inits by appending them to their preinits statement, and in the case of
+  // transformations, also append their preinits (which contain the original
+  // loop initialization statement or other statements)
+
+  // Firstly we need to set TransformIndex to match the begining of the
+  // looprange section
+  unsigned int TransformIndex = 0;
+  for (unsigned I : llvm::seq<unsigned>(FirstVal - 1)) {
+    if (SeqAnalysis.Loops[I].Category == OMPLoopCategory::TransformSingleLoop)
+      ++TransformIndex;
+  }
+
+  for (unsigned int I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) {
+    if (SeqAnalysis.Loops[I].Category == OMPLoopCategory::RegularLoop) {
+      addLoopPreInits(Context, SeqAnalysis.Loops[I].HelperExprs,
+                      SeqAnalysis.Loops[I].ForStmt,
+                      SeqAnalysis.Loops[I].OriginalInits, PreInits);
+    } else if (SeqAnalysis.Loops[I].Category ==
+               OMPLoopCategory::TransformSingleLoop) {
+      // For transformed loops, insert both pre-inits and original inits.
+      // Order matters: pre-inits may define variables used in the original
+      // inits such as upper bounds...
+      SmallVector<Stmt *> &TransformPreInit =
+          SeqAnalysis.Loops[TransformIndex++].TransformsPreInits;
+      if (!TransformPreInit.empty())
+        llvm::append_range(PreInits, TransformPreInit);
+
+      addLoopPreInits(Context, SeqAnalysis.Loops[I].HelperExprs,
+                      SeqAnalysis.Loops[I].ForStmt,
+                      SeqAnalysis.Loops[I].OriginalInits, PreInits);
+    }
+    auto [UBVD, UBDStmt] =
+        CreateHelperVarAndStmt(SeqAnalysis.Loops[I].HelperExprs.UB, "ub", J);
+    auto [LBVD, LBDStmt] =
+        CreateHelperVarAndStmt(SeqAnalysis.Loops[I].HelperExprs.LB, "lb", J);
+    auto [STVD, STDStmt] =
+        CreateHelperVarAndStmt(SeqAnalysis.Loops[I].HelperExprs.ST, "st", J);
+    auto [NIVD, NIDStmt] = CreateHelperVarAndStmt(
+        SeqAnalysis.Loops[I].HelperExprs.NumIterations, "ni", J, true);
+    auto [IVVD, IVDStmt] = CreateHelperVarAndStmt(
+        SeqAnalysis.Loops[I].HelperExprs.IterationVarRef, "iv", J);
+
+    assert(LBVD && STVD && NIVD && IVVD &&
+           "OpenMP Fuse Helper variables creation failed");
+
+    UBVarDecls.push_back(UBVD);
+    LBVarDecls.push_back(LBVD);
+    STVarDecls.push_back(STVD);
+    NIVarDecls.push_back(NIVD);
+    IVVarDecls.push_back(IVVD);
+
+    PreInits.push_back(LBDStmt.get());
+    PreInits.push_back(STDStmt.get());
+    PreInits.push_back(NIDStmt.get());
+    PreInits.push_back(IVDStmt.get());
+  }
+
+  auto MakeVarDeclRef = [&SemaRef = this->SemaRef](VarDecl *VD) {
+    return buildDeclRefExpr(SemaRef, VD, VD->getType(), VD->getLocation(),
+                            false);
+  };
+
+  // Following up the creation of the final fused loop will be performed
+  // which has the following shape (considering the selected loops):
+  //
+  // for (fuse.index = 0; fuse.index < max(ni0, ni1..., nik); ++fuse.index) {
+  //    if (fuse.index < ni0){
+  //      iv0 = lb0 + st0 * fuse.index;
+  //      original.index0 = iv0
+  //      body(0);
+  //    }
+  //    if (fuse.index < ni1){
+  //      iv1 = lb1 + st1 * fuse.index;
+  //      original.index1 = iv1
+  //      body(1);
+  //    }
+  //
+  //    ...
+  //
+  //    if (fuse.index < nik){
+  //      ivk = lbk + stk * fuse.index;
+  //      original.indexk = ivk
+  //      body(k);  Expr *InitVal = IntegerLiteral::Create(Context,
+  //      llvm::APInt(IVWidth, 0),
+  //    }
+
+  // 1. Create the initialized fuse index
+  StringRef IndexName = ".omp.fuse.index";
+  Expr *InitVal = IntegerLiteral::Create(Context, llvm::APInt(IVBitWidth, 0),
+                                         IVType, SourceLocation());
+  VarDecl *IndexDecl =
+      buildVarDecl(SemaRef, {}, IVType, IndexName, nullptr, nullptr);
+  SemaRef.AddInitializerToDecl(IndexDecl, InitVal, false);
+  StmtResult InitStmt = new (Context)
+      DeclStmt(DeclGroupRef(IndexDecl), SourceLocation(), SourceLocation());
+
+  if (!InitStmt.isUsable())
+    return StmtError();
+
+  auto MakeIVRef = [&SemaRef = this->SemaRef, IndexDecl, IVType,
+                    Loc = InitVal->getExprLoc()]() {
+    return buildDeclRefExpr(SemaRef, IndexDecl, IVType, Loc, false);
+  };
+
+  // 2. Iteratively compute the max number of logical iterations Max(NI_1, NI_2,
+  // ..., NI_k)
+  //
+  // This loop accumulates the maximum value across multiple expressions,
+  // ensuring each step constructs a unique AST node for correctness. By using
+  // intermediate temporary variables and conditional operators, we maintain
+  // distinct nodes and avoid duplicating subtrees,  For instance, max(a,b,c):
+  //   omp.temp0 = max(a, b)
+  //   omp.temp1 = max(omp.temp0, c)
+  //   omp.fuse.max = max(omp.temp1, omp.temp0)
+
+  ExprResult MaxExpr;
+  // I is the true
+  for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) {
+    DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[J]);
+    QualType NITy = NIRef->getType();
+
+    if (MaxExpr.isUnset()) {
+      // Initialize MaxExpr with the first NI expression
+      MaxExpr = NIRef;
+    } else {
+      // Create a new acummulator variable t_i = MaxExpr
+      std::string TempName = (Twine(".omp.temp.") + Twine(J)).str();
+      VarDecl *TempDecl =
+          buildVarDecl(SemaRef, {}, NITy, TempName, nullptr, nullptr);
+      TempDecl->setInit(MaxExpr.get());
+      DeclRefExpr *TempRef =
+          buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false);
+      DeclRefExpr *TempRef2 =
+          buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false);
+      // Add a DeclStmt to PreInits to ensure the variable is declared.
+      StmtResult TempStmt = new (Context)
+          DeclStmt(DeclGroupRef(TempDecl), SourceLocation(), SourceLocation());
+
+      if (!TempStmt.isUsable())
+        return StmtError();
+      PreInits.push_back(TempStmt.get());
+
+      // Build MaxExpr <-(MaxExpr > NIRef ? MaxExpr : NIRef)
+      ExprResult Comparison =
+          SemaRef.BuildBinOp(nullptr, SourceLocation(), BO_GT, TempRef, NIRef);
+      // Handle any errors in Comparison creation
+      if (!Comparison.isUsable())
+        return StmtError();
+
+      DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[J]);
+      // Update MaxExpr using a conditional expression to hold the max value
+      MaxExpr = new (Context) ConditionalOperator(
+          Comparison.get(), SourceLocation(), TempRef2, SourceLocation(),
+          NIRef2->getExprStmt(), NITy, VK_LValue, OK_Ordinary);
+
+      if (!MaxExpr.isUsable())
+        return StmtError();
+    }
+  }
+  if (!MaxExpr.isUsable())
+    return StmtError();
+
+  // 3. Declare the max variable
+  const std::string MaxName = Twine(".omp.fuse.max").str();
+  VarDecl *MaxDecl =
+      buildVarDecl(SemaRef, {}, IVType, MaxName, nullptr, nullptr);
+  MaxDecl->setInit(MaxExpr.get());
+  DeclRefExpr *MaxRef = buildDeclRefExpr(SemaRef, MaxDecl, IVType, {}, false);
+  StmtResult MaxStmt = new (Context)
+      DeclStmt(DeclGroupRef(MaxDecl), SourceLocation(), SourceLocation());
+
+  if (MaxStmt.isInvalid())
+    return StmtError();
+  PreInits.push_back(MaxStmt.get());
+
+  // 4. Create condition Expr: index < n_max
+  ExprResult CondExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT,
+                                           MakeIVRef(), MaxRef);
+  if (!CondExpr.isUsable())
+    return StmtError();
+
+  // 5. Increment Expr: ++index
+  ExprResult IncrExpr =
+      SemaRef.BuildUnaryOp(CurScope, SourceLocation(), UO_PreInc, MakeIVRef());
+  if (!IncrExpr.isUsable())
+    return StmtError();
+
+  // 6. Build the Fused Loop Body
+  // The final fused loop iterates over the maximum logical range. Inside the
+  // loop, each original loop's index is calculated dynamically, and its body
+  // is executed conditionally.
+  //
+  // Each sub-loop's body is guarded by a conditional statement to ensure
+  // it executes only within its logical iteration range:
+  //
+  //    if (fuse.index < ni_k){
+  //      iv_k = lb_k + st_k * fuse.index;
+  //      original.index = iv_k
+  //      body(k);
+  //    }
+
+  CompoundStmt *FusedBody = nullptr;
+  SmallVector<Stmt *, 4> FusedBodyStmts;
+  for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) {
+    // Assingment of the original sub-loop index to compute the logical index
+    // IV_k = LB_k + omp.fuse.index * ST_k
+    ExprResult IdxExpr =
+        SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul,
+                           MakeVarDeclRef(STVarDecls[J]), MakeIVRef());
+    if (!IdxExpr.isUsable())
+      return StmtError();
+    IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Add,
+                                 MakeVarDeclRef(LBVarDecls[J]), IdxExpr.get());
+
+    if (!IdxExpr.isUsable())
+      return StmtError();
+    IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Assign,
+                                 MakeVarDeclRef(IVVarDecls[J]), IdxExpr.get());
+    if (!IdxExpr.isUsable())
+      return StmtError();
+
+    // Update the original i_k = IV_k
+    SmallVector<Stmt *, 4> BodyStmts;
+    BodyStmts.push_back(IdxExpr.get());
+    llvm::append_range(BodyStmts, SeqAnalysis.Loops[I].HelperExprs.Updates);
+
+    // If the loop is a CXXForRangeStmt then the iterator variable is needed
+    if (auto *SourceCXXFor =
+            dyn_cast<CXXForRangeStmt>(SeqAnalysis.Loops[I].ForStmt))
+      BodyStmts.push_back(SourceCXXFor->getLoopVarStmt());
+
+    Stmt *Body =
+        (isa<ForStmt>(SeqAnalysis.Loops[I].ForStmt))
+            ? cast<ForStmt>(SeqAnalysis.Loops[I].ForStmt)->getBody()
+            : cast<CXXForRangeStmt>(SeqAnalysis.Loops[I].ForStmt)->getBody();
+    BodyStmts.push_back(Body);
+
+    CompoundStmt *CombinedBody =
+        CompoundStmt::Create(Context, BodyStmts, FPOptionsOverride(),
+                             SourceLocation(), SourceLocation());
+    ExprResult Condition =
+        SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, MakeIVRef(),
+                           MakeVarDeclRef(NIVarDecls[J]));
+
+    if (!Condition.isUsable())
+      return StmtError();
+
+    IfStmt *IfStatement = IfStmt::Create(
+        Context, SourceLocation(), IfStatementKind::Ordinary, nullptr, nullptr,
+        Condition.get(), SourceLocation(), SourceLocation(), CombinedBody,
+        SourceLocation(), nullptr);
+
+    FusedBodyStmts.push_back(IfStatement);
+  }
+  FusedBody = CompoundStmt::Create(Context, FusedBodyStmts, FPOptionsOverride(),
+                                   SourceLocation(), SourceLocation());
+
+  // 7. Construct the final fused loop
+  ForStmt *FusedForStmt = new (Context)
+      ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr, IncrExpr.get(),
+              FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(),
+              IncrExpr.get()->getEndLoc());
+
+  //  In the case of looprange, the result of fuse won't simply
+  //  be a single loop (ForStmt), but rather a loop sequence
+  //  (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop
+  //  and the post-fusion loops, preserving its original order.
+  //
+  //  Note: If looprange clause produces a single fused loop nest then
+  //  this compound statement wrapper is unnecessary (Therefore this
+  //  treatment is skipped)
+
+  Stmt *FusionStmt = FusedForStmt;
+  if (LRC && CountVal != SeqAnalysis.LoopSeqSize) {
+    SmallVector<Stmt *, 4> FinalLoops;
+
+    // Reset the transform index
+    TransformIndex = 0;
+
+    // Collect all non-fused loops before and after the fused region.
+    // Pre-fusion and post-fusion loops are inserted in order exploiting their
+    // symmetry, along with their corresponding transformation pre-inits if
+    // needed. The fused loop is added between the two regions.
+    for (unsigned I = 0; I < SeqAnalysis.LoopSeqSize; ++I) {
+      if (I >= FirstVal - 1 && I < FirstVal + CountVal - 1) {
+        // Update the Transformation counter to skip already treated
+        // loop transformations
+        if (SeqAnalysis.Loops[I].Category !=
+            OMPLoopCategory::TransformSingleLoop)
+          ++TransformIndex;
+        continue;
+      }
+
+      // No need to handle:
+      // Regular loops: they are kept intact as-is.
+      // Loop-sequence-generating transformations: already handled earlier.
+      // Only TransformSingleLoop requires inserting pre-inits here
+      if (SeqAnalysis.Loops[I].Category ==
+          OMPLoopCategory::TransformSingleLoop) {
+        const auto &TransformPreInit =
+            SeqAnalysis.Loops[TransformIndex++].TransformsPreInits;
+        if (!TransformPreInit.empty())
+          llvm::append_range(PreInits, TransformPreInit);
+      }
+
+      FinalLoops.push_back(SeqAnalysis.Loops[I].ForStmt);
+    }
+
+    FinalLoops.insert(FinalLoops.begin() + (FirstVal - 1), FusedForStmt);
+    FusionStmt = CompoundStmt::Create(Context, FinalLoops, FPOptionsOverride(),
+                                      SourceLocation(), SourceLocation());
+  }
+  return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses,
+                                  SeqAnalysis.LoopSeqSize, NumLoopNests, AStmt,
+                                  FusionStmt, buildPreInits(Context, PreInits));
+}
+
 OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind,
                                                    Expr *Expr,
                                                    SourceLocation StartLoc,
@@ -16649,6 +17418,31 @@ OMPClause *SemaOpenMP::ActOnOpenMPPartialClause(Expr *FactorExpr,
                                   FactorExpr);
 }
 
+OMPClause *SemaOpenMP::ActOnOpenMPLoopRangeClause(
+    Expr *First, Expr *Count, SourceLocation StartLoc, SourceLocation LParenLoc,
+    SourceLocation FirstLoc, SourceLocation CountLoc, SourceLocation EndLoc) {
+
+  // OpenMP [6.0, Restrictions]
+  // First and Count must be integer expressions with positive value
+  ExprResult FirstVal =
+      VerifyPositiveIntegerConstantInClause(First, OMPC_looprange);
+  if (FirstVal.isInvalid())
+    First = nullptr;
+
+  ExprResult CountVal =
+      VerifyPositiveIntegerConstantInClause(Count, OMPC_looprange);
+  if (CountVal.isInvalid())
+    Count = nullptr;
+
+  // OpenMP [6.0, Restrictions]
+  // first + count - 1 must not evaluate to a value greater than the
+  // loop sequence length of the associated canonical loop sequence.
+  // This check must be performed afterwards due to the delayed
+  // parsing and computation of the associated loop sequence
+  return OMPLoopRangeClause::Create(getASTContext(), StartLoc, LParenLoc,
+                                    FirstLoc, CountLoc, EndLoc, First, Count);
+}
+
 OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc,
                                               SourceLocation LParenLoc,
                                               SourceLocation EndLoc) {
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 79aca11839802..368abaf0f8ff0 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1800,6 +1800,14 @@ class TreeTransform {
                                                        LParenLoc, EndLoc);
   }
 
+  OMPClause *
+  RebuildOMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc,
+                            SourceLocation LParenLoc, SourceLocation FirstLoc,
+                            SourceLocation CountLoc, SourceLocation EndLoc) {
+    return getSema().OpenMP().ActOnOpenMPLoopRangeClause(
+        First, Count, StartLoc, LParenLoc, FirstLoc, CountLoc, EndLoc);
+  }
+
   /// Build a new OpenMP 'allocator' clause.
   ///
   /// By default, performs semantic analysis to build the new OpenMP clause.
@@ -9689,6 +9697,17 @@ StmtResult TreeTransform<Derived>::TransformOMPInterchangeDirective(
   return Res;
 }
 
+template <typename Derived>
+StmtResult
+TreeTransform<Derived>::TransformOMPFuseDirective(OMPFuseDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().OpenMP().StartOpenMPDSABlock(
+      D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
 template <typename Derived>
 StmtResult
 TreeTransform<Derived>::TransformOMPForDirective(OMPForDirective *D) {
@@ -10582,6 +10601,31 @@ TreeTransform<Derived>::TransformOMPPartialClause(OMPPartialClause *C) {
                                  C->getEndLoc());
 }
 
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPLoopRangeClause(OMPLoopRangeClause *C) {
+  ExprResult F = getDerived().TransformExpr(C->getFirst());
+  if (F.isInvalid())
+    return nullptr;
+
+  ExprResult Cn = getDerived().TransformExpr(C->getCount());
+  if (Cn.isInvalid())
+    return nullptr;
+
+  Expr *First = F.get();
+  Expr *Count = Cn.get();
+
+  bool Changed = (First != C->getFirst()) || (Count != C->getCount());
+
+  // If no changes and AlwaysRebuild() is false, return the original clause
+  if (!Changed && !getDerived().AlwaysRebuild())
+    return C;
+
+  return RebuildOMPLoopRangeClause(First, Count, C->getBeginLoc(),
+                                   C->getLParenLoc(), C->getFirstLoc(),
+                                   C->getCountLoc(), C->getEndLoc());
+}
+
 template <typename Derived>
 OMPClause *
 TreeTransform<Derived>::TransformOMPCollapseClause(OMPCollapseClause *C) {
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 32f7779278286..915154b41982c 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -11229,6 +11229,9 @@ OMPClause *OMPClauseReader::readClause() {
   case llvm::omp::OMPC_partial:
     C = OMPPartialClause::CreateEmpty(Context);
     break;
+  case llvm::omp::OMPC_looprange:
+    C = OMPLoopRangeClause::CreateEmpty(Context);
+    break;
   case llvm::omp::OMPC_allocator:
     C = new (Context) OMPAllocatorClause();
     break;
@@ -11632,6 +11635,14 @@ void OMPClauseReader::VisitOMPPartialClause(OMPPartialClause *C) {
   C->setLParenLoc(Record.readSourceLocation());
 }
 
+void OMPClauseReader::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) {
+  C->setFirst(Record.readSubExpr());
+  C->setCount(Record.readSubExpr());
+  C->setLParenLoc(Record.readSourceLocation());
+  C->setFirstLoc(Record.readSourceLocation());
+  C->setCountLoc(Record.readSourceLocation());
+}
+
 void OMPClauseReader::VisitOMPAllocatorClause(OMPAllocatorClause *C) {
   C->setAllocator(Record.readExpr());
   C->setLParenLoc(Record.readSourceLocation());
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 13618b4a03d1e..f2c5873e99f16 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2464,10 +2464,23 @@ void ASTStmtReader::VisitOMPReverseDirective(OMPReverseDirective *D) {
   VisitOMPCanonicalLoopNestTransformationDirective(D);
 }
 
+void ASTStmtReader::VisitOMPCanonicalLoopSequenceTransformationDirective(
+    OMPCanonicalLoopSequenceTransformationDirective *D) {
+  VisitStmt(D);
+  // Field NumLoopsInSequence was read in ReadStmtFromStream.
+  Record.skipInts(1);
+  VisitOMPExecutableDirective(D);
+  D->setNumGeneratedLoops(Record.readUInt32());
+}
+
 void ASTStmtReader::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) {
   VisitOMPCanonicalLoopNestTransformationDirective(D);
 }
 
+void ASTStmtReader::VisitOMPFuseDirective(OMPFuseDirective *D) {
+  VisitOMPCanonicalLoopSequenceTransformationDirective(D);
+}
+
 void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) {
   VisitOMPLoopDirective(D);
   D->setHasCancel(Record.readBool());
@@ -3609,6 +3622,13 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       S = OMPReverseDirective::CreateEmpty(Context, NumLoops);
       break;
     }
+    case STMT_OMP_FUSE_DIRECTIVE: {
+      unsigned NumLoopsInSequence = Record[ASTStmtReader::NumStmtFields];
+      unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1];
+      S = OMPFuseDirective::CreateEmpty(Context, NumClauses,
+                                        NumLoopsInSequence);
+      break;
+    }
 
     case STMT_OMP_INTERCHANGE_DIRECTIVE: {
       unsigned NumLoops = Record[ASTStmtReader::NumStmtFields];
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 87e462f179ce6..a62ddf437c4fc 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -7865,6 +7865,14 @@ void OMPClauseWriter::VisitOMPPartialClause(OMPPartialClause *C) {
   Record.AddSourceLocation(C->getLParenLoc());
 }
 
+void OMPClauseWriter::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) {
+  Record.AddStmt(C->getFirst());
+  Record.AddStmt(C->getCount());
+  Record.AddSourceLocation(C->getLParenLoc());
+  Record.AddSourceLocation(C->getFirstLoc());
+  Record.AddSourceLocation(C->getCountLoc());
+}
+
 void OMPClauseWriter::VisitOMPAllocatorClause(OMPAllocatorClause *C) {
   Record.AddStmt(C->getAllocator());
   Record.AddSourceLocation(C->getLParenLoc());
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 36b022cc9d371..87d8ab0710c9f 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -2476,6 +2476,19 @@ void ASTStmtWriter::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) {
   Code = serialization::STMT_OMP_INTERCHANGE_DIRECTIVE;
 }
 
+void ASTStmtWriter::VisitOMPCanonicalLoopSequenceTransformationDirective(
+    OMPCanonicalLoopSequenceTransformationDirective *D) {
+  VisitStmt(D);
+  Record.writeUInt32(D->getNumLoopsInSequence());
+  VisitOMPExecutableDirective(D);
+  Record.writeUInt32(D->getNumGeneratedLoops());
+}
+
+void ASTStmtWriter::VisitOMPFuseDirective(OMPFuseDirective *D) {
+  VisitOMPCanonicalLoopSequenceTransformationDirective(D);
+  Code = serialization::STMT_OMP_FUSE_DIRECTIVE;
+}
+
 void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) {
   VisitOMPLoopDirective(D);
   Record.writeBool(D->hasCancel());
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 785cdfa15bf04..4e472b7fc38b0 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1814,6 +1814,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::OMPStripeDirectiveClass:
     case Stmt::OMPTileDirectiveClass:
     case Stmt::OMPInterchangeDirectiveClass:
+    case Stmt::OMPFuseDirectiveClass:
     case Stmt::OMPInteropDirectiveClass:
     case Stmt::OMPDispatchDirectiveClass:
     case Stmt::OMPMaskedDirectiveClass:
diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp
new file mode 100644
index 0000000000000..9d85bd1172948
--- /dev/null
+++ b/clang/test/OpenMP/fuse_ast_print.cpp
@@ -0,0 +1,400 @@
+// Check no warnings/errors
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -fsyntax-only -verify %s
+// expected-no-diagnostics
+
+// Check AST and unparsing 
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-dump  %s | FileCheck %s --check-prefix=DUMP
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT
+
+// Check same results after serialization round-trip 
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -emit-pch -o %t %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-print    %s | FileCheck %s --check-prefix=PRINT
+
+#ifndef HEADER
+#define HEADER 
+
+// placeholder for loop body code
+extern "C" void body(...);
+
+// PRINT-LABEL: void foo1(
+// DUMP-LABEL: FunctionDecl {{.*}} foo1
+void foo1() {
+    // PRINT: #pragma omp fuse
+    // DUMP:  OMPFuseDirective
+    #pragma omp fuse 
+    // PRINT: {
+    // DUMP: CompoundStmt       
+    {
+        // PRINT: for (int i = 0; i < 10; i += 2)
+        // DUMP: ForStmt
+        for (int i = 0; i < 10; i += 2)
+            // PRINT: body(i)
+            // DUMP: CallExpr
+            body(i);
+        // PRINT: for (int j = 10; j > 0; --j)
+        // DUMP: ForStmt
+        for (int j = 10; j > 0; --j)
+            // PRINT: body(j)
+            // DUMP: CallExpr
+            body(j);
+        // PRINT: for (int k = 0; k <= 10; ++k)
+        // DUMP: ForStmt
+        for (int k = 0; k <= 10; ++k)
+            // PRINT: body(k)
+            // DUMP: CallExpr
+            body(k);
+
+    }
+
+}
+
+// PRINT-LABEL: void foo2(
+// DUMP-LABEL: FunctionDecl {{.*}} foo2
+void foo2() {
+    // PRINT: #pragma omp unroll partial(4)
+    // DUMP: OMPUnrollDirective
+    // DUMP-NEXT: OMPPartialClause
+    // DUMP-NEXT: ConstantExpr
+    // DUMP-NEXT: value: Int 4
+    // DUMP-NEXT: IntegerLiteral {{.*}} 4
+    #pragma omp unroll partial(4)
+    // PRINT: #pragma omp fuse
+    // DUMP-NEXT: OMPFuseDirective 
+    #pragma omp fuse 
+    // PRINT: {
+    // DUMP: CompoundStmt       
+    {
+        // PRINT: for (int i = 0; i < 10; i += 2)
+        // DUMP: ForStmt
+        for (int i = 0; i < 10; i += 2)
+            // PRINT: body(i)
+            // DUMP: CallExpr
+            body(i);
+        // PRINT: for (int j = 10; j > 0; --j)
+        // DUMP: ForStmt
+        for (int j = 10; j > 0; --j)
+            // PRINT: body(j)
+            // DUMP: CallExpr
+            body(j);  
+    }    
+    
+}
+
+//PRINT-LABEL: void foo3(
+//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo3
+template<int Factor1, int Factor2> 
+void foo3() {
+    // PRINT:  #pragma omp fuse
+    // DUMP: OMPFuseDirective
+    #pragma omp fuse 
+    // PRINT: {
+    // DUMP: CompoundStmt       
+    {
+        // PRINT: #pragma omp unroll partial(Factor1)
+        // DUMP: OMPUnrollDirective
+        #pragma omp unroll partial(Factor1)
+        // PRINT: for (int i = 0; i < 12; i += 1)
+        // DUMP: ForStmt
+        for (int i = 0; i < 12; i += 1)
+            // PRINT: body(i)
+            // DUMP: CallExpr
+            body(i);
+        // PRINT: #pragma omp unroll partial(Factor2)
+        // DUMP: OMPUnrollDirective
+        #pragma omp unroll partial(Factor2)
+        // PRINT: for (int k = 0; k <= 10; ++k)
+        // DUMP: ForStmt
+        for (int k = 0; k <= 10; ++k)
+            // PRINT: body(k)
+            // DUMP: CallExpr
+            body(k);
+
+    }
+}
+
+// Also test instantiating the template.
+void tfoo3() {
+    foo3<4,2>();
+}
+
+//PRINT-LABEL: void foo4(
+//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo4
+template<typename T, T Step> 
+void foo4(int start, int end) {
+    // PRINT:  #pragma omp fuse
+    // DUMP: OMPFuseDirective
+    #pragma omp fuse 
+    // PRINT: {
+    // DUMP: CompoundStmt       
+    {
+        // PRINT: for (T i = start; i < end; i += Step)
+        // DUMP: ForStmt
+        for (T i = start; i < end; i += Step)
+            // PRINT: body(i)
+            // DUMP: CallExpr
+            body(i);
+
+        // PRINT: for (T j = end; j > start; j -= Step)
+        // DUMP: ForStmt 
+        for (T j = end; j > start; j -= Step) {
+            // PRINT: body(j)
+            // DUMP: CallExpr
+            body(j);
+        }
+
+    }
+}
+
+// Also test instantiating the template.
+void tfoo4() {
+    foo4<int, 4>(0, 64);
+}
+
+
+
+// PRINT-LABEL: void foo5(
+// DUMP-LABEL: FunctionDecl {{.*}} foo5
+void foo5() {
+    double arr[128], arr2[128];
+    // PRINT: #pragma omp fuse
+    // DUMP:  OMPFuseDirective
+    #pragma omp fuse 
+    // PRINT: {
+    // DUMP: CompoundStmt       
+    {
+        // PRINT-NEXT: for (auto &&a : arr)
+        // DUMP-NEXT: CXXForRangeStmt
+        for (auto &&a: arr)
+            // PRINT: body(a)
+            // DUMP: CallExpr
+            body(a);
+        // PRINT: for (double v = 42; auto &&b : arr)
+        // DUMP: CXXForRangeStmt
+        for (double v = 42; auto &&b: arr)
+            // PRINT: body(b, v);
+            // DUMP: CallExpr
+            body(b, v);
+        // PRINT: for (auto &&c : arr2)
+        // DUMP: CXXForRangeStmt
+        for (auto &&c: arr2)
+            // PRINT: body(c)
+            // DUMP: CallExpr
+            body(c);
+
+    }
+
+}
+
+// PRINT-LABEL: void foo6(
+// DUMP-LABEL: FunctionDecl {{.*}} foo6
+void foo6() {
+    // PRINT: #pragma omp fuse
+    // DUMP: OMPFuseDirective
+    #pragma omp fuse 
+    // PRINT: {
+    // DUMP: CompoundStmt
+    {
+        // PRINT: #pragma omp fuse
+        // DUMP: OMPFuseDirective
+        #pragma omp fuse 
+        // PRINT: {
+        // DUMP: CompoundStmt
+        {
+            // PRINT: for (int i = 0; i <= 10; ++i)
+            // DUMP: ForStmt
+            for (int i = 0; i <= 10; ++i)
+                body(i);
+            // PRINT: for (int j = 0; j < 100; ++j)
+            // DUMP: ForStmt
+            for(int j = 0; j < 100; ++j)
+                body(j);
+        }
+        // PRINT: #pragma omp unroll partial(4)
+        // DUMP: OMPUnrollDirective
+        #pragma omp unroll partial(4)
+        // PRINT: for (int k = 0; k < 250; ++k)
+        // DUMP: ForStmt
+        for (int k = 0; k < 250; ++k) 
+            body(k);
+    }
+}
+
+// PRINT-LABEL: void foo7(
+// DUMP-LABEL: FunctionDecl {{.*}} foo7
+void foo7() {
+    // PRINT: #pragma omp fuse
+    // DUMP:  OMPFuseDirective
+    #pragma omp fuse 
+    // PRINT: {
+    // DUMP: CompoundStmt       
+    {
+        // PRINT: {
+        // DUMP: CompoundStmt   
+        {
+            // PRINT: {
+            // DUMP: CompoundStmt   
+            {
+                // PRINT: for (int i = 0; i < 10; i += 2)
+                // DUMP: ForStmt
+                for (int i = 0; i < 10; i += 2)
+                    // PRINT: body(i)
+                    // DUMP: CallExpr
+                    body(i);
+                // PRINT: for (int j = 10; j > 0; --j)
+                // DUMP: ForStmt
+                for (int j = 10; j > 0; --j)
+                    // PRINT: body(j)
+                    // DUMP: CallExpr
+                    body(j);
+            }
+        }
+        // PRINT: {
+        // DUMP: CompoundStmt   
+        {
+            // PRINT: {
+            // DUMP: CompoundStmt   
+            {
+                // PRINT: {
+                // DUMP: CompoundStmt   
+                {
+                    // PRINT: for (int k = 0; k <= 10; ++k)
+                    // DUMP: ForStmt
+                    for (int k = 0; k <= 10; ++k)
+                        // PRINT: body(k)
+                        // DUMP: CallExpr
+                        body(k);
+                }
+            }
+        }
+    }
+
+}
+
+// PRINT-LABEL: void foo8(
+// DUMP-LABEL: FunctionDecl {{.*}} foo8
+void foo8() {
+    // PRINT: #pragma omp fuse looprange(2,2)
+    // DUMP:  OMPFuseDirective
+    // DUMP: OMPLooprangeClause
+    #pragma omp fuse looprange(2,2)
+    // PRINT: {
+    // DUMP: CompoundStmt       
+    {
+        // PRINT: for (int i = 0; i < 10; i += 2)
+        // DUMP: ForStmt
+        for (int i = 0; i < 10; i += 2)
+            // PRINT: body(i)
+            // DUMP: CallExpr
+            body(i);
+        // PRINT: for (int j = 10; j > 0; --j)
+        // DUMP: ForStmt
+        for (int j = 10; j > 0; --j)
+            // PRINT: body(j)
+            // DUMP: CallExpr
+            body(j);
+        // PRINT: for (int k = 0; k <= 10; ++k)
+        // DUMP: ForStmt
+        for (int k = 0; k <= 10; ++k)
+            // PRINT: body(k)
+            // DUMP: CallExpr
+            body(k);
+
+    }
+
+}
+
+//PRINT-LABEL: void foo9(
+//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo9
+//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} F
+//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} C
+template<int F, int C> 
+void foo9() {
+    // PRINT:  #pragma omp fuse looprange(F,C)
+    // DUMP: OMPFuseDirective
+    // DUMP: OMPLooprangeClause
+    #pragma omp fuse looprange(F,C)
+    // PRINT: {
+    // DUMP: CompoundStmt       
+    {
+        // PRINT: for (int i = 0; i < 10; i += 2)
+        // DUMP: ForStmt
+        for (int i = 0; i < 10; i += 2)
+            // PRINT: body(i)
+            // DUMP: CallExpr
+            body(i);
+        // PRINT: for (int j = 10; j > 0; --j)
+        // DUMP: ForStmt
+        for (int j = 10; j > 0; --j)
+            // PRINT: body(j)
+            // DUMP: CallExpr
+            body(j);
+
+    }
+}
+
+// Also test instantiating the template.
+void tfoo9() {
+    foo9<1, 2>();
+}
+
+// PRINT-LABEL: void foo10(
+// DUMP-LABEL: FunctionDecl {{.*}} foo10
+void foo10() {
+    // PRINT: #pragma omp fuse looprange(2,2)
+    // DUMP:  OMPFuseDirective
+    // DUMP: OMPLooprangeClause
+    #pragma omp fuse looprange(2,2)
+    // PRINT: {
+    // DUMP: CompoundStmt       
+    {
+        // PRINT: for (int i = 0; i < 10; i += 2)
+        // DUMP: ForStmt
+        for (int i = 0; i < 10; i += 2)
+            // PRINT: body(i)
+            // DUMP: CallExpr
+            body(i);
+        // PRINT: for (int ii = 0; ii < 10; ii += 2)
+        // DUMP: ForStmt
+        for (int ii = 0; ii < 10; ii += 2)
+            // PRINT: body(ii)
+            // DUMP: CallExpr
+            body(ii);
+        // PRINT: #pragma omp fuse looprange(2,2)
+        // DUMP:  OMPFuseDirective
+        // DUMP: OMPLooprangeClause
+        #pragma omp fuse looprange(2,2)
+        {
+            // PRINT: for (int j = 10; j > 0; --j)
+            // DUMP: ForStmt
+            for (int j = 10; j > 0; --j)
+                // PRINT: body(j)
+                // DUMP: CallExpr
+                body(j);
+            // PRINT: for (int jj = 10; jj > 0; --jj)
+            // DUMP: ForStmt
+            for (int jj = 10; jj > 0; --jj)
+                // PRINT: body(jj)
+                // DUMP: CallExpr
+                body(jj);
+            // PRINT: for (int k = 0; k <= 10; ++k)
+            // DUMP: ForStmt
+            for (int k = 0; k <= 10; ++k)
+                // PRINT: body(k)
+                // DUMP: CallExpr
+                body(k);
+            // PRINT: for (int kk = 0; kk <= 10; ++kk)
+            // DUMP: ForStmt
+            for (int kk = 0; kk <= 10; ++kk)
+                // PRINT: body(kk)
+                // DUMP: CallExpr
+                body(kk);
+        }
+    }
+
+}
+
+
+
+
+#endif
\ No newline at end of file
diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp
new file mode 100644
index 0000000000000..742c280ed0172
--- /dev/null
+++ b/clang/test/OpenMP/fuse_codegen.cpp
@@ -0,0 +1,2328 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 5
+// expected-no-diagnostics
+
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2
+
+#ifndef HEADER
+#define HEADER
+
+//placeholder for loop body code.
+extern "C" void body(...) {}
+
+extern "C" void foo1(int start1, int end1, int step1, int start2, int end2, int step2) {
+    int i,j;
+    #pragma omp fuse
+    {
+        for(i = start1; i < end1; i += step1) body(i);
+        for(j = start2; j < end2; j += step2) body(j);
+    }
+
+}
+
+template <typename T>
+void foo2(T start, T end, T step){
+    T i,j,k;
+    #pragma omp fuse
+    {
+        for(i = start; i < end; i += step) body(i);
+        for(j = end; j > start; j -= step) body(j);
+        for(k = start+step; k < end+step; k += step) body(k);
+    }
+}
+
+extern "C" void tfoo2() {
+    foo2<int>(0, 64, 4);
+}
+
+extern "C" void foo3() {
+    double arr[256];
+    #pragma omp fuse
+    {
+        #pragma omp fuse
+        {
+            for(int i = 0; i < 128; ++i) body(i);
+            for(int j = 0; j < 256; j+=2) body(j);
+        }
+        for(int c = 42; auto &&v: arr) body(c,v);
+        for(int cc = 37; auto &&vv: arr) body(cc, vv);
+    }
+}
+
+extern "C" void foo4() {
+    double arr[256];
+
+    #pragma omp fuse looprange(2,2)
+    {
+        for(int i = 0; i < 128; ++i) body(i);
+        for(int j = 0; j < 256; j+=2) body(j);
+        for(int k = 0; k < 64; ++k) body(k);
+        for(int c = 42; auto &&v: arr) body(c,v);
+    }
+}
+
+// This exemplifies the usage of loop transformations that generate
+// more than top level canonical loop nests (e.g split, loopranged fuse...)
+extern "C" void foo5() {
+    double arr[256];
+    #pragma omp fuse looprange(2,2)
+    {
+        #pragma omp fuse looprange(2,2)
+        {
+            for(int i = 0; i < 128; ++i) body(i);
+            for(int j = 0; j < 256; j+=2) body(j);
+            for(int k = 0; k < 512; ++k) body(k);
+        }
+        for(int c = 42; auto &&v: arr) body(c,v);
+        for(int cc = 37; auto &&vv: arr) body(cc, vv);
+    }
+}
+
+
+#endif
+// CHECK1-LABEL: define dso_local void @body(
+// CHECK1-SAME: ...) #[[ATTR0:[0-9]+]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @foo1(
+// CHECK1-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
+// CHECK1-NEXT:    [[START1_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[END1_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[STEP1_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[START2_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[END2_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[STEP2_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store i32 [[START1]], ptr [[START1_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[END1]], ptr [[END1_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[START2]], ptr [[START2_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[END2]], ptr [[END2_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK1-NEXT:    [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT:    [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK1-NEXT:    [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK1-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT:    [[ADD5:%.*]] = add i32 [[TMP8]], 1
+// CHECK1-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP9]], ptr [[J]], align 4
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT:    [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
+// CHECK1-NEXT:    [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
+// CHECK1-NEXT:    [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK1-NEXT:    store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT:    [[ADD15:%.*]] = add i32 [[TMP17]], 1
+// CHECK1-NEXT:    store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]]
+// CHECK1-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1:       [[COND_TRUE]]:
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END:.*]]
+// CHECK1:       [[COND_FALSE]]:
+// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END]]
+// CHECK1:       [[COND_END]]:
+// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ]
+// CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK1:       [[FOR_COND]]:
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]]
+// CHECK1-NEXT:    br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1:       [[FOR_BODY]]:
+// CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
+// CHECK1-NEXT:    br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK1:       [[IF_THEN]]:
+// CHECK1-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]]
+// CHECK1-NEXT:    [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]]
+// CHECK1-NEXT:    store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT:    [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT:    [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]]
+// CHECK1-NEXT:    store i32 [[ADD20]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP33]])
+// CHECK1-NEXT:    br label %[[IF_END]]
+// CHECK1:       [[IF_END]]:
+// CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]]
+// CHECK1-NEXT:    br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]]
+// CHECK1:       [[IF_THEN22]]:
+// CHECK1-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]]
+// CHECK1-NEXT:    [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]]
+// CHECK1-NEXT:    store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]]
+// CHECK1-NEXT:    [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]]
+// CHECK1-NEXT:    store i32 [[ADD26]], ptr [[J]], align 4
+// CHECK1-NEXT:    [[TMP42:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP42]])
+// CHECK1-NEXT:    br label %[[IF_END27]]
+// CHECK1:       [[IF_END27]]:
+// CHECK1-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK1:       [[FOR_INC]]:
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add i32 [[TMP43]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK1:       [[FOR_END]]:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @tfoo2(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
+// CHECK1-NEXT:    call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4)
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_(
+// CHECK1-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
+// CHECK1-NEXT:    [[START_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[END_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[STEP_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[K:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTNEW_STEP21:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store i32 [[START]], ptr [[START_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[END]], ptr [[END_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[STEP]], ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK1-NEXT:    [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT:    [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK1-NEXT:    [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK1-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT:    [[ADD5:%.*]] = add i32 [[TMP8]], 1
+// CHECK1-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP9]], ptr [[J]], align 4
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT:    [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
+// CHECK1-NEXT:    [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
+// CHECK1-NEXT:    [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK1-NEXT:    store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT:    [[ADD15:%.*]] = add i32 [[TMP17]], 1
+// CHECK1-NEXT:    store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK1-NEXT:    store i32 [[ADD16]], ptr [[K]], align 4
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT:    [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK1-NEXT:    store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT:    [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK1-NEXT:    store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK1-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT:    [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]]
+// CHECK1-NEXT:    [[SUB24:%.*]] = sub i32 [[SUB23]], 1
+// CHECK1-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT:    [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]]
+// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT:    [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]]
+// CHECK1-NEXT:    [[SUB27:%.*]] = sub i32 [[DIV26]], 1
+// CHECK1-NEXT:    store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB2]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST2]], align 4
+// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK1-NEXT:    [[ADD28:%.*]] = add i32 [[TMP29]], 1
+// CHECK1-NEXT:    store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1:       [[COND_TRUE]]:
+// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END:.*]]
+// CHECK1:       [[COND_FALSE]]:
+// CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END]]
+// CHECK1:       [[COND_END]]:
+// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ]
+// CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT:    [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]]
+// CHECK1-NEXT:    br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]]
+// CHECK1:       [[COND_TRUE30]]:
+// CHECK1-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT:    br label %[[COND_END32:.*]]
+// CHECK1:       [[COND_FALSE31]]:
+// CHECK1-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT:    br label %[[COND_END32]]
+// CHECK1:       [[COND_END32]]:
+// CHECK1-NEXT:    [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ]
+// CHECK1-NEXT:    store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK1:       [[FOR_COND]]:
+// CHECK1-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]]
+// CHECK1-NEXT:    br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1:       [[FOR_BODY]]:
+// CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]]
+// CHECK1-NEXT:    br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK1:       [[IF_THEN]]:
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]]
+// CHECK1-NEXT:    [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]]
+// CHECK1-NEXT:    store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT:    [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]]
+// CHECK1-NEXT:    [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]]
+// CHECK1-NEXT:    store i32 [[ADD38]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP49:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP49]])
+// CHECK1-NEXT:    br label %[[IF_END]]
+// CHECK1:       [[IF_END]]:
+// CHECK1-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]]
+// CHECK1-NEXT:    br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]]
+// CHECK1:       [[IF_THEN40]]:
+// CHECK1-NEXT:    [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]]
+// CHECK1-NEXT:    [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]]
+// CHECK1-NEXT:    store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]]
+// CHECK1-NEXT:    [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]]
+// CHECK1-NEXT:    store i32 [[SUB44]], ptr [[J]], align 4
+// CHECK1-NEXT:    [[TMP58:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP58]])
+// CHECK1-NEXT:    br label %[[IF_END45]]
+// CHECK1:       [[IF_END45]]:
+// CHECK1-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT:    [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]]
+// CHECK1-NEXT:    br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK1:       [[IF_THEN47]]:
+// CHECK1-NEXT:    [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
+// CHECK1-NEXT:    [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
+// CHECK1-NEXT:    [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]]
+// CHECK1-NEXT:    [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]]
+// CHECK1-NEXT:    store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4
+// CHECK1-NEXT:    [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT:    [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
+// CHECK1-NEXT:    [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT:    [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]]
+// CHECK1-NEXT:    [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]]
+// CHECK1-NEXT:    store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK1-NEXT:    [[TMP67:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP67]])
+// CHECK1-NEXT:    br label %[[IF_END52]]
+// CHECK1:       [[IF_END52]]:
+// CHECK1-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK1:       [[FOR_INC]]:
+// CHECK1-NEXT:    [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add i32 [[TMP68]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK1:       [[FOR_END]]:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @foo3(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
+// CHECK1-NEXT:    [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_IV06:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[C:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__END2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_IV120:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[__RANGE221:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__END222:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__BEGIN225:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_LB2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_ST2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_NI2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_IV2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[V:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[VV:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    store i32 128, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK1-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1:       [[COND_TRUE]]:
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END:.*]]
+// CHECK1:       [[COND_FALSE]]:
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END]]
+// CHECK1:       [[COND_END]]:
+// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK1-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK1-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST04]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK1-NEXT:    [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK1-NEXT:    store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT:    store i32 42, ptr [[C]], align 4
+// CHECK1-NEXT:    store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
+// CHECK1-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK1-NEXT:    store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK1-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK1-NEXT:    store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT:    [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK1-NEXT:    [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK1-NEXT:    [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK1-NEXT:    [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK1-NEXT:    [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK1-NEXT:    [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK1-NEXT:    [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK1-NEXT:    [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK1-NEXT:    store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT:    store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT:    store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT:    [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK1-NEXT:    store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT:    store i32 37, ptr [[CC]], align 4
+// CHECK1-NEXT:    store ptr [[ARR]], ptr [[__RANGE221]], align 8
+// CHECK1-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0
+// CHECK1-NEXT:    [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256
+// CHECK1-NEXT:    store ptr [[ADD_PTR24]], ptr [[__END222]], align 8
+// CHECK1-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8
+// CHECK1-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK1-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8
+// CHECK1-NEXT:    store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK1-NEXT:    [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64
+// CHECK1-NEXT:    [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64
+// CHECK1-NEXT:    [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]]
+// CHECK1-NEXT:    [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8
+// CHECK1-NEXT:    [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1
+// CHECK1-NEXT:    [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1
+// CHECK1-NEXT:    [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1
+// CHECK1-NEXT:    [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1
+// CHECK1-NEXT:    store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8
+// CHECK1-NEXT:    store i64 0, ptr [[DOTOMP_LB2]], align 8
+// CHECK1-NEXT:    store i64 1, ptr [[DOTOMP_ST2]], align 8
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8
+// CHECK1-NEXT:    [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1
+// CHECK1-NEXT:    store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT:    [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT:    store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK1-NEXT:    [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT:    [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]]
+// CHECK1-NEXT:    br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]]
+// CHECK1:       [[COND_TRUE42]]:
+// CHECK1-NEXT:    [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK1-NEXT:    br label %[[COND_END44:.*]]
+// CHECK1:       [[COND_FALSE43]]:
+// CHECK1-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT:    br label %[[COND_END44]]
+// CHECK1:       [[COND_END44]]:
+// CHECK1-NEXT:    [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ]
+// CHECK1-NEXT:    store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT:    [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT:    [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT:    [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]]
+// CHECK1-NEXT:    br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]]
+// CHECK1:       [[COND_TRUE48]]:
+// CHECK1-NEXT:    [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT:    br label %[[COND_END50:.*]]
+// CHECK1:       [[COND_FALSE49]]:
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT:    br label %[[COND_END50]]
+// CHECK1:       [[COND_END50]]:
+// CHECK1-NEXT:    [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ]
+// CHECK1-NEXT:    store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK1-NEXT:    store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK1:       [[FOR_COND]]:
+// CHECK1-NEXT:    [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK1-NEXT:    [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT:    br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1:       [[FOR_BODY]]:
+// CHECK1-NEXT:    [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT:    [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]]
+// CHECK1-NEXT:    br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]]
+// CHECK1:       [[IF_THEN]]:
+// CHECK1-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT:    [[CONV55:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK1-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK1-NEXT:    [[CONV56:%.*]] = sext i32 [[TMP36]] to i64
+// CHECK1-NEXT:    [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]]
+// CHECK1-NEXT:    [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]]
+// CHECK1-NEXT:    [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32
+// CHECK1-NEXT:    store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT:    [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1
+// CHECK1-NEXT:    [[ADD60:%.*]] = add nsw i32 0, [[MUL59]]
+// CHECK1-NEXT:    store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]]
+// CHECK1-NEXT:    br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]]
+// CHECK1:       [[IF_THEN62]]:
+// CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT:    [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]]
+// CHECK1-NEXT:    store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1
+// CHECK1-NEXT:    [[ADD66:%.*]] = add nsw i32 0, [[MUL65]]
+// CHECK1-NEXT:    store i32 [[ADD66]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP45:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP45]])
+// CHECK1-NEXT:    br label %[[IF_END]]
+// CHECK1:       [[IF_END]]:
+// CHECK1-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]]
+// CHECK1-NEXT:    br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]]
+// CHECK1:       [[IF_THEN68]]:
+// CHECK1-NEXT:    [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]]
+// CHECK1-NEXT:    [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]]
+// CHECK1-NEXT:    store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2
+// CHECK1-NEXT:    [[ADD72:%.*]] = add nsw i32 0, [[MUL71]]
+// CHECK1-NEXT:    store i32 [[ADD72]], ptr [[J]], align 4
+// CHECK1-NEXT:    [[TMP52:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP52]])
+// CHECK1-NEXT:    br label %[[IF_END73]]
+// CHECK1:       [[IF_END73]]:
+// CHECK1-NEXT:    br label %[[IF_END74]]
+// CHECK1:       [[IF_END74]]:
+// CHECK1-NEXT:    [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT:    [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT:    [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]]
+// CHECK1-NEXT:    br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]]
+// CHECK1:       [[IF_THEN76]]:
+// CHECK1-NEXT:    [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT:    [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT:    [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT:    [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]]
+// CHECK1-NEXT:    [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]]
+// CHECK1-NEXT:    store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT:    [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT:    [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT:    [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1
+// CHECK1-NEXT:    [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]]
+// CHECK1-NEXT:    store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    store ptr [[TMP60]], ptr [[V]], align 8
+// CHECK1-NEXT:    [[TMP61:%.*]] = load i32, ptr [[C]], align 4
+// CHECK1-NEXT:    [[TMP62:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK1-NEXT:    [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]])
+// CHECK1-NEXT:    br label %[[IF_END81]]
+// CHECK1:       [[IF_END81]]:
+// CHECK1-NEXT:    [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT:    [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT:    [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]]
+// CHECK1-NEXT:    br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]]
+// CHECK1:       [[IF_THEN83]]:
+// CHECK1-NEXT:    [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
+// CHECK1-NEXT:    [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
+// CHECK1-NEXT:    [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT:    [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]]
+// CHECK1-NEXT:    [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]]
+// CHECK1-NEXT:    store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8
+// CHECK1-NEXT:    [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK1-NEXT:    [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
+// CHECK1-NEXT:    [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1
+// CHECK1-NEXT:    [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]]
+// CHECK1-NEXT:    store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8
+// CHECK1-NEXT:    [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8
+// CHECK1-NEXT:    store ptr [[TMP71]], ptr [[VV]], align 8
+// CHECK1-NEXT:    [[TMP72:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK1-NEXT:    [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK1-NEXT:    [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]])
+// CHECK1-NEXT:    br label %[[IF_END88]]
+// CHECK1:       [[IF_END88]]:
+// CHECK1-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK1:       [[FOR_INC]]:
+// CHECK1-NEXT:    [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i64 [[TMP75]], 1
+// CHECK1-NEXT:    store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK1:       [[FOR_END]]:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @foo4(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
+// CHECK1-NEXT:    [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK1-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[K:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[C:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__END2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[V:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[K]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    store i32 64, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK1-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1:       [[COND_TRUE]]:
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END:.*]]
+// CHECK1:       [[COND_FALSE]]:
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END]]
+// CHECK1:       [[COND_END]]:
+// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK1:       [[FOR_COND]]:
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128
+// CHECK1-NEXT:    br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1:       [[FOR_BODY]]:
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP6]])
+// CHECK1-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK1:       [[FOR_INC]]:
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[I]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK1:       [[FOR_END]]:
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND2:.*]]
+// CHECK1:       [[FOR_COND2]]:
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT:    br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]]
+// CHECK1:       [[FOR_BODY4]]:
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]]
+// CHECK1-NEXT:    br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK1:       [[IF_THEN]]:
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]]
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]]
+// CHECK1-NEXT:    store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2
+// CHECK1-NEXT:    [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
+// CHECK1-NEXT:    store i32 [[ADD7]], ptr [[J]], align 4
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP16]])
+// CHECK1-NEXT:    br label %[[IF_END]]
+// CHECK1:       [[IF_END]]:
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]]
+// CHECK1-NEXT:    br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]]
+// CHECK1:       [[IF_THEN9]]:
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]]
+// CHECK1-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]]
+// CHECK1-NEXT:    store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1
+// CHECK1-NEXT:    [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
+// CHECK1-NEXT:    store i32 [[ADD13]], ptr [[K]], align 4
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP23]])
+// CHECK1-NEXT:    br label %[[IF_END14]]
+// CHECK1:       [[IF_END14]]:
+// CHECK1-NEXT:    br label %[[FOR_INC15:.*]]
+// CHECK1:       [[FOR_INC15]]:
+// CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[INC16:%.*]] = add nsw i32 [[TMP24]], 1
+// CHECK1-NEXT:    store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND2]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK1:       [[FOR_END17]]:
+// CHECK1-NEXT:    store i32 42, ptr [[C]], align 4
+// CHECK1-NEXT:    store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0
+// CHECK1-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256
+// CHECK1-NEXT:    store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK1-NEXT:    br label %[[FOR_COND19:.*]]
+// CHECK1:       [[FOR_COND19]]:
+// CHECK1-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK1-NEXT:    [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]]
+// CHECK1-NEXT:    br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]]
+// CHECK1:       [[FOR_BODY21]]:
+// CHECK1-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    store ptr [[TMP29]], ptr [[V]], align 8
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[C]], align 4
+// CHECK1-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK1-NEXT:    [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]])
+// CHECK1-NEXT:    br label %[[FOR_INC22:.*]]
+// CHECK1:       [[FOR_INC22]]:
+// CHECK1-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1
+// CHECK1-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    br label %[[FOR_COND19]]
+// CHECK1:       [[FOR_END23]]:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @foo5(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
+// CHECK1-NEXT:    [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK1-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[K:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_IV06:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[C:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__END2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_IV120:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[V:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[__RANGE264:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__BEGIN265:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__END267:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[VV:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[K]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    store i32 512, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK1-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1:       [[COND_TRUE]]:
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END:.*]]
+// CHECK1:       [[COND_FALSE]]:
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END]]
+// CHECK1:       [[COND_END]]:
+// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK1-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK1-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST04]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK1-NEXT:    [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK1-NEXT:    store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT:    store i32 42, ptr [[C]], align 4
+// CHECK1-NEXT:    store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
+// CHECK1-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK1-NEXT:    store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK1-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK1-NEXT:    store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT:    [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK1-NEXT:    [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK1-NEXT:    [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK1-NEXT:    [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK1-NEXT:    [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK1-NEXT:    [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK1-NEXT:    [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK1-NEXT:    [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK1-NEXT:    store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT:    store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT:    store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT:    [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK1-NEXT:    store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT:    store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT:    [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]]
+// CHECK1-NEXT:    br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]]
+// CHECK1:       [[COND_TRUE24]]:
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK1-NEXT:    br label %[[COND_END26:.*]]
+// CHECK1:       [[COND_FALSE25]]:
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT:    br label %[[COND_END26]]
+// CHECK1:       [[COND_END26]]:
+// CHECK1-NEXT:    [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ]
+// CHECK1-NEXT:    store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK1:       [[FOR_COND]]:
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128
+// CHECK1-NEXT:    br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1:       [[FOR_BODY]]:
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP21]])
+// CHECK1-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK1:       [[FOR_INC]]:
+// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP22]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[I]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK1:       [[FOR_END]]:
+// CHECK1-NEXT:    store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT:    br label %[[FOR_COND30:.*]]
+// CHECK1:       [[FOR_COND30]]:
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT:    [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK1-NEXT:    [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]]
+// CHECK1-NEXT:    br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]]
+// CHECK1:       [[FOR_BODY32]]:
+// CHECK1-NEXT:    [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT:    [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]]
+// CHECK1-NEXT:    br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]]
+// CHECK1:       [[IF_THEN]]:
+// CHECK1-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT:    [[CONV34:%.*]] = sext i32 [[TMP27]] to i64
+// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK1-NEXT:    [[CONV35:%.*]] = sext i32 [[TMP28]] to i64
+// CHECK1-NEXT:    [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]]
+// CHECK1-NEXT:    [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]]
+// CHECK1-NEXT:    [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32
+// CHECK1-NEXT:    store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT:    [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1
+// CHECK1-NEXT:    [[ADD39:%.*]] = add nsw i32 0, [[MUL38]]
+// CHECK1-NEXT:    store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT:    br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]]
+// CHECK1:       [[IF_THEN41]]:
+// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]]
+// CHECK1-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]]
+// CHECK1-NEXT:    store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2
+// CHECK1-NEXT:    [[ADD45:%.*]] = add nsw i32 0, [[MUL44]]
+// CHECK1-NEXT:    store i32 [[ADD45]], ptr [[J]], align 4
+// CHECK1-NEXT:    [[TMP37:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP37]])
+// CHECK1-NEXT:    br label %[[IF_END]]
+// CHECK1:       [[IF_END]]:
+// CHECK1-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]]
+// CHECK1-NEXT:    br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK1:       [[IF_THEN47]]:
+// CHECK1-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]]
+// CHECK1-NEXT:    [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]]
+// CHECK1-NEXT:    store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1
+// CHECK1-NEXT:    [[ADD51:%.*]] = add nsw i32 0, [[MUL50]]
+// CHECK1-NEXT:    store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP44]])
+// CHECK1-NEXT:    br label %[[IF_END52]]
+// CHECK1:       [[IF_END52]]:
+// CHECK1-NEXT:    br label %[[IF_END53]]
+// CHECK1:       [[IF_END53]]:
+// CHECK1-NEXT:    [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT:    [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT:    [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]]
+// CHECK1-NEXT:    br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]]
+// CHECK1:       [[IF_THEN55]]:
+// CHECK1-NEXT:    [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT:    [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT:    [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT:    [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]]
+// CHECK1-NEXT:    [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]]
+// CHECK1-NEXT:    store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT:    [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT:    [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT:    [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1
+// CHECK1-NEXT:    [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]]
+// CHECK1-NEXT:    store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    store ptr [[TMP52]], ptr [[V]], align 8
+// CHECK1-NEXT:    [[TMP53:%.*]] = load i32, ptr [[C]], align 4
+// CHECK1-NEXT:    [[TMP54:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK1-NEXT:    [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]])
+// CHECK1-NEXT:    br label %[[IF_END60]]
+// CHECK1:       [[IF_END60]]:
+// CHECK1-NEXT:    br label %[[FOR_INC61:.*]]
+// CHECK1:       [[FOR_INC61]]:
+// CHECK1-NEXT:    [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT:    [[INC62:%.*]] = add nsw i64 [[TMP56]], 1
+// CHECK1-NEXT:    store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT:    br label %[[FOR_COND30]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK1:       [[FOR_END63]]:
+// CHECK1-NEXT:    store i32 37, ptr [[CC]], align 4
+// CHECK1-NEXT:    store ptr [[ARR]], ptr [[__RANGE264]], align 8
+// CHECK1-NEXT:    [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT:    [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0
+// CHECK1-NEXT:    [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256
+// CHECK1-NEXT:    store ptr [[ADD_PTR69]], ptr [[__END267]], align 8
+// CHECK1-NEXT:    br label %[[FOR_COND70:.*]]
+// CHECK1:       [[FOR_COND70]]:
+// CHECK1-NEXT:    [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT:    [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8
+// CHECK1-NEXT:    [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]]
+// CHECK1-NEXT:    br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]]
+// CHECK1:       [[FOR_BODY72]]:
+// CHECK1-NEXT:    [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT:    store ptr [[TMP61]], ptr [[VV]], align 8
+// CHECK1-NEXT:    [[TMP62:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK1-NEXT:    [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK1-NEXT:    [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]])
+// CHECK1-NEXT:    br label %[[FOR_INC73:.*]]
+// CHECK1:       [[FOR_INC73]]:
+// CHECK1-NEXT:    [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1
+// CHECK1-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT:    br label %[[FOR_COND70]]
+// CHECK1:       [[FOR_END74]]:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @body(
+// CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] {
+// CHECK2-NEXT:  [[ENTRY:.*:]]
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @foo1(
+// CHECK2-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] {
+// CHECK2-NEXT:  [[ENTRY:.*:]]
+// CHECK2-NEXT:    [[START1_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[END1_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[STEP1_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[START2_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[END2_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[STEP2_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 [[START1]], ptr [[START1_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[END1]], ptr [[END1_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[START2]], ptr [[START2_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[END2]], ptr [[END2_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK2-NEXT:    [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK2-NEXT:    [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT:    [[ADD5:%.*]] = add i32 [[TMP8]], 1
+// CHECK2-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP9]], ptr [[J]], align 4
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT:    [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
+// CHECK2-NEXT:    [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
+// CHECK2-NEXT:    [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK2-NEXT:    store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT:    [[ADD15:%.*]] = add i32 [[TMP17]], 1
+// CHECK2-NEXT:    store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]]
+// CHECK2-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2:       [[COND_TRUE]]:
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END:.*]]
+// CHECK2:       [[COND_FALSE]]:
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END]]
+// CHECK2:       [[COND_END]]:
+// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ]
+// CHECK2-NEXT:    store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK2:       [[FOR_COND]]:
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]]
+// CHECK2-NEXT:    br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2:       [[FOR_BODY]]:
+// CHECK2-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
+// CHECK2-NEXT:    br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK2:       [[IF_THEN]]:
+// CHECK2-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]]
+// CHECK2-NEXT:    [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]]
+// CHECK2-NEXT:    store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT:    [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT:    [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]]
+// CHECK2-NEXT:    store i32 [[ADD20]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP33:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP33]])
+// CHECK2-NEXT:    br label %[[IF_END]]
+// CHECK2:       [[IF_END]]:
+// CHECK2-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]]
+// CHECK2-NEXT:    br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]]
+// CHECK2:       [[IF_THEN22]]:
+// CHECK2-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]]
+// CHECK2-NEXT:    [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]]
+// CHECK2-NEXT:    store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]]
+// CHECK2-NEXT:    [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]]
+// CHECK2-NEXT:    store i32 [[ADD26]], ptr [[J]], align 4
+// CHECK2-NEXT:    [[TMP42:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP42]])
+// CHECK2-NEXT:    br label %[[IF_END27]]
+// CHECK2:       [[IF_END27]]:
+// CHECK2-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK2:       [[FOR_INC]]:
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add i32 [[TMP43]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK2:       [[FOR_END]]:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @foo3(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT:  [[ENTRY:.*:]]
+// CHECK2-NEXT:    [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_IV06:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[C:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__END2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_IV120:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[__RANGE221:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__END222:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__BEGIN225:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_LB2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_ST2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_NI2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_IV2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[V:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[VV:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    store i32 128, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK2-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2:       [[COND_TRUE]]:
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END:.*]]
+// CHECK2:       [[COND_FALSE]]:
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END]]
+// CHECK2:       [[COND_END]]:
+// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK2-NEXT:    store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST04]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK2-NEXT:    [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK2-NEXT:    store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT:    store i32 42, ptr [[C]], align 4
+// CHECK2-NEXT:    store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
+// CHECK2-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK2-NEXT:    store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK2-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK2-NEXT:    store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT:    [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK2-NEXT:    [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK2-NEXT:    [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK2-NEXT:    [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK2-NEXT:    [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK2-NEXT:    [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK2-NEXT:    [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK2-NEXT:    [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK2-NEXT:    store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT:    store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT:    store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT:    [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK2-NEXT:    store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT:    store i32 37, ptr [[CC]], align 4
+// CHECK2-NEXT:    store ptr [[ARR]], ptr [[__RANGE221]], align 8
+// CHECK2-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0
+// CHECK2-NEXT:    [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256
+// CHECK2-NEXT:    store ptr [[ADD_PTR24]], ptr [[__END222]], align 8
+// CHECK2-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8
+// CHECK2-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK2-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8
+// CHECK2-NEXT:    store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK2-NEXT:    [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64
+// CHECK2-NEXT:    [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64
+// CHECK2-NEXT:    [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]]
+// CHECK2-NEXT:    [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8
+// CHECK2-NEXT:    [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1
+// CHECK2-NEXT:    [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1
+// CHECK2-NEXT:    [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1
+// CHECK2-NEXT:    [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1
+// CHECK2-NEXT:    store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8
+// CHECK2-NEXT:    store i64 0, ptr [[DOTOMP_LB2]], align 8
+// CHECK2-NEXT:    store i64 1, ptr [[DOTOMP_ST2]], align 8
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8
+// CHECK2-NEXT:    [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1
+// CHECK2-NEXT:    store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT:    store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK2-NEXT:    [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT:    [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]]
+// CHECK2-NEXT:    br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]]
+// CHECK2:       [[COND_TRUE42]]:
+// CHECK2-NEXT:    [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK2-NEXT:    br label %[[COND_END44:.*]]
+// CHECK2:       [[COND_FALSE43]]:
+// CHECK2-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT:    br label %[[COND_END44]]
+// CHECK2:       [[COND_END44]]:
+// CHECK2-NEXT:    [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ]
+// CHECK2-NEXT:    store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT:    [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT:    [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT:    [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]]
+// CHECK2-NEXT:    br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]]
+// CHECK2:       [[COND_TRUE48]]:
+// CHECK2-NEXT:    [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT:    br label %[[COND_END50:.*]]
+// CHECK2:       [[COND_FALSE49]]:
+// CHECK2-NEXT:    [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT:    br label %[[COND_END50]]
+// CHECK2:       [[COND_END50]]:
+// CHECK2-NEXT:    [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ]
+// CHECK2-NEXT:    store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK2-NEXT:    store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK2:       [[FOR_COND]]:
+// CHECK2-NEXT:    [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT:    [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK2-NEXT:    [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT:    br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2:       [[FOR_BODY]]:
+// CHECK2-NEXT:    [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT:    [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]]
+// CHECK2-NEXT:    br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]]
+// CHECK2:       [[IF_THEN]]:
+// CHECK2-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT:    [[CONV55:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK2-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK2-NEXT:    [[CONV56:%.*]] = sext i32 [[TMP36]] to i64
+// CHECK2-NEXT:    [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]]
+// CHECK2-NEXT:    [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]]
+// CHECK2-NEXT:    [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32
+// CHECK2-NEXT:    store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT:    [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1
+// CHECK2-NEXT:    [[ADD60:%.*]] = add nsw i32 0, [[MUL59]]
+// CHECK2-NEXT:    store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]]
+// CHECK2-NEXT:    br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]]
+// CHECK2:       [[IF_THEN62]]:
+// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT:    [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]]
+// CHECK2-NEXT:    store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1
+// CHECK2-NEXT:    [[ADD66:%.*]] = add nsw i32 0, [[MUL65]]
+// CHECK2-NEXT:    store i32 [[ADD66]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP45:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP45]])
+// CHECK2-NEXT:    br label %[[IF_END]]
+// CHECK2:       [[IF_END]]:
+// CHECK2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]]
+// CHECK2-NEXT:    br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]]
+// CHECK2:       [[IF_THEN68]]:
+// CHECK2-NEXT:    [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]]
+// CHECK2-NEXT:    [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]]
+// CHECK2-NEXT:    store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2
+// CHECK2-NEXT:    [[ADD72:%.*]] = add nsw i32 0, [[MUL71]]
+// CHECK2-NEXT:    store i32 [[ADD72]], ptr [[J]], align 4
+// CHECK2-NEXT:    [[TMP52:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP52]])
+// CHECK2-NEXT:    br label %[[IF_END73]]
+// CHECK2:       [[IF_END73]]:
+// CHECK2-NEXT:    br label %[[IF_END74]]
+// CHECK2:       [[IF_END74]]:
+// CHECK2-NEXT:    [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT:    [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT:    [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]]
+// CHECK2-NEXT:    br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]]
+// CHECK2:       [[IF_THEN76]]:
+// CHECK2-NEXT:    [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT:    [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT:    [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT:    [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]]
+// CHECK2-NEXT:    [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]]
+// CHECK2-NEXT:    store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT:    [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT:    [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT:    [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1
+// CHECK2-NEXT:    [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]]
+// CHECK2-NEXT:    store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    store ptr [[TMP60]], ptr [[V]], align 8
+// CHECK2-NEXT:    [[TMP61:%.*]] = load i32, ptr [[C]], align 4
+// CHECK2-NEXT:    [[TMP62:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK2-NEXT:    [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]])
+// CHECK2-NEXT:    br label %[[IF_END81]]
+// CHECK2:       [[IF_END81]]:
+// CHECK2-NEXT:    [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT:    [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT:    [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]]
+// CHECK2-NEXT:    br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]]
+// CHECK2:       [[IF_THEN83]]:
+// CHECK2-NEXT:    [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
+// CHECK2-NEXT:    [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
+// CHECK2-NEXT:    [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT:    [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]]
+// CHECK2-NEXT:    [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]]
+// CHECK2-NEXT:    store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8
+// CHECK2-NEXT:    [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK2-NEXT:    [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
+// CHECK2-NEXT:    [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1
+// CHECK2-NEXT:    [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]]
+// CHECK2-NEXT:    store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8
+// CHECK2-NEXT:    [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8
+// CHECK2-NEXT:    store ptr [[TMP71]], ptr [[VV]], align 8
+// CHECK2-NEXT:    [[TMP72:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK2-NEXT:    [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK2-NEXT:    [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]])
+// CHECK2-NEXT:    br label %[[IF_END88]]
+// CHECK2:       [[IF_END88]]:
+// CHECK2-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK2:       [[FOR_INC]]:
+// CHECK2-NEXT:    [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i64 [[TMP75]], 1
+// CHECK2-NEXT:    store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK2:       [[FOR_END]]:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @foo4(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT:  [[ENTRY:.*:]]
+// CHECK2-NEXT:    [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK2-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[K:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[C:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__END2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[V:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[K]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    store i32 64, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK2-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2:       [[COND_TRUE]]:
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END:.*]]
+// CHECK2:       [[COND_FALSE]]:
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END]]
+// CHECK2:       [[COND_END]]:
+// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK2-NEXT:    store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK2:       [[FOR_COND]]:
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128
+// CHECK2-NEXT:    br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2:       [[FOR_BODY]]:
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP6]])
+// CHECK2-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK2:       [[FOR_INC]]:
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[I]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK2:       [[FOR_END]]:
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND2:.*]]
+// CHECK2:       [[FOR_COND2]]:
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]]
+// CHECK2-NEXT:    br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]]
+// CHECK2:       [[FOR_BODY4]]:
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]]
+// CHECK2-NEXT:    br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK2:       [[IF_THEN]]:
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]]
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]]
+// CHECK2-NEXT:    store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2
+// CHECK2-NEXT:    [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
+// CHECK2-NEXT:    store i32 [[ADD7]], ptr [[J]], align 4
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP16]])
+// CHECK2-NEXT:    br label %[[IF_END]]
+// CHECK2:       [[IF_END]]:
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]]
+// CHECK2-NEXT:    br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]]
+// CHECK2:       [[IF_THEN9]]:
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]]
+// CHECK2-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]]
+// CHECK2-NEXT:    store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1
+// CHECK2-NEXT:    [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
+// CHECK2-NEXT:    store i32 [[ADD13]], ptr [[K]], align 4
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP23]])
+// CHECK2-NEXT:    br label %[[IF_END14]]
+// CHECK2:       [[IF_END14]]:
+// CHECK2-NEXT:    br label %[[FOR_INC15:.*]]
+// CHECK2:       [[FOR_INC15]]:
+// CHECK2-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[INC16:%.*]] = add nsw i32 [[TMP24]], 1
+// CHECK2-NEXT:    store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND2]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK2:       [[FOR_END17]]:
+// CHECK2-NEXT:    store i32 42, ptr [[C]], align 4
+// CHECK2-NEXT:    store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0
+// CHECK2-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256
+// CHECK2-NEXT:    store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK2-NEXT:    br label %[[FOR_COND19:.*]]
+// CHECK2:       [[FOR_COND19]]:
+// CHECK2-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK2-NEXT:    [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]]
+// CHECK2-NEXT:    br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]]
+// CHECK2:       [[FOR_BODY21]]:
+// CHECK2-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    store ptr [[TMP29]], ptr [[V]], align 8
+// CHECK2-NEXT:    [[TMP30:%.*]] = load i32, ptr [[C]], align 4
+// CHECK2-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK2-NEXT:    [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]])
+// CHECK2-NEXT:    br label %[[FOR_INC22:.*]]
+// CHECK2:       [[FOR_INC22]]:
+// CHECK2-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1
+// CHECK2-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    br label %[[FOR_COND19]]
+// CHECK2:       [[FOR_END23]]:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @foo5(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT:  [[ENTRY:.*:]]
+// CHECK2-NEXT:    [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK2-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[K:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_IV06:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[C:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__END2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_IV120:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[V:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[__RANGE264:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__BEGIN265:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__END267:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[VV:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[K]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    store i32 512, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK2-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2:       [[COND_TRUE]]:
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END:.*]]
+// CHECK2:       [[COND_FALSE]]:
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END]]
+// CHECK2:       [[COND_END]]:
+// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK2-NEXT:    store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST04]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK2-NEXT:    [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK2-NEXT:    store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT:    store i32 42, ptr [[C]], align 4
+// CHECK2-NEXT:    store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
+// CHECK2-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK2-NEXT:    store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK2-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK2-NEXT:    store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT:    [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK2-NEXT:    [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK2-NEXT:    [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK2-NEXT:    [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK2-NEXT:    [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK2-NEXT:    [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK2-NEXT:    [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK2-NEXT:    [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK2-NEXT:    store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT:    store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT:    store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT:    [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK2-NEXT:    store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT:    store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT:    [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]]
+// CHECK2-NEXT:    br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]]
+// CHECK2:       [[COND_TRUE24]]:
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK2-NEXT:    br label %[[COND_END26:.*]]
+// CHECK2:       [[COND_FALSE25]]:
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT:    br label %[[COND_END26]]
+// CHECK2:       [[COND_END26]]:
+// CHECK2-NEXT:    [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ]
+// CHECK2-NEXT:    store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK2:       [[FOR_COND]]:
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128
+// CHECK2-NEXT:    br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2:       [[FOR_BODY]]:
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP21]])
+// CHECK2-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK2:       [[FOR_INC]]:
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP22]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[I]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK2:       [[FOR_END]]:
+// CHECK2-NEXT:    store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT:    br label %[[FOR_COND30:.*]]
+// CHECK2:       [[FOR_COND30]]:
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT:    [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK2-NEXT:    [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]]
+// CHECK2-NEXT:    br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]]
+// CHECK2:       [[FOR_BODY32]]:
+// CHECK2-NEXT:    [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT:    [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]]
+// CHECK2-NEXT:    br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]]
+// CHECK2:       [[IF_THEN]]:
+// CHECK2-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT:    [[CONV34:%.*]] = sext i32 [[TMP27]] to i64
+// CHECK2-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK2-NEXT:    [[CONV35:%.*]] = sext i32 [[TMP28]] to i64
+// CHECK2-NEXT:    [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]]
+// CHECK2-NEXT:    [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]]
+// CHECK2-NEXT:    [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32
+// CHECK2-NEXT:    store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT:    [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1
+// CHECK2-NEXT:    [[ADD39:%.*]] = add nsw i32 0, [[MUL38]]
+// CHECK2-NEXT:    store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT:    br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]]
+// CHECK2:       [[IF_THEN41]]:
+// CHECK2-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]]
+// CHECK2-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]]
+// CHECK2-NEXT:    store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2
+// CHECK2-NEXT:    [[ADD45:%.*]] = add nsw i32 0, [[MUL44]]
+// CHECK2-NEXT:    store i32 [[ADD45]], ptr [[J]], align 4
+// CHECK2-NEXT:    [[TMP37:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP37]])
+// CHECK2-NEXT:    br label %[[IF_END]]
+// CHECK2:       [[IF_END]]:
+// CHECK2-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]]
+// CHECK2-NEXT:    br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK2:       [[IF_THEN47]]:
+// CHECK2-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]]
+// CHECK2-NEXT:    [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]]
+// CHECK2-NEXT:    store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1
+// CHECK2-NEXT:    [[ADD51:%.*]] = add nsw i32 0, [[MUL50]]
+// CHECK2-NEXT:    store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP44]])
+// CHECK2-NEXT:    br label %[[IF_END52]]
+// CHECK2:       [[IF_END52]]:
+// CHECK2-NEXT:    br label %[[IF_END53]]
+// CHECK2:       [[IF_END53]]:
+// CHECK2-NEXT:    [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT:    [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT:    [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]]
+// CHECK2-NEXT:    br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]]
+// CHECK2:       [[IF_THEN55]]:
+// CHECK2-NEXT:    [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT:    [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT:    [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT:    [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]]
+// CHECK2-NEXT:    [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]]
+// CHECK2-NEXT:    store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT:    [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT:    [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT:    [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1
+// CHECK2-NEXT:    [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]]
+// CHECK2-NEXT:    store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    store ptr [[TMP52]], ptr [[V]], align 8
+// CHECK2-NEXT:    [[TMP53:%.*]] = load i32, ptr [[C]], align 4
+// CHECK2-NEXT:    [[TMP54:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK2-NEXT:    [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]])
+// CHECK2-NEXT:    br label %[[IF_END60]]
+// CHECK2:       [[IF_END60]]:
+// CHECK2-NEXT:    br label %[[FOR_INC61:.*]]
+// CHECK2:       [[FOR_INC61]]:
+// CHECK2-NEXT:    [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT:    [[INC62:%.*]] = add nsw i64 [[TMP56]], 1
+// CHECK2-NEXT:    store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT:    br label %[[FOR_COND30]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK2:       [[FOR_END63]]:
+// CHECK2-NEXT:    store i32 37, ptr [[CC]], align 4
+// CHECK2-NEXT:    store ptr [[ARR]], ptr [[__RANGE264]], align 8
+// CHECK2-NEXT:    [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT:    [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0
+// CHECK2-NEXT:    [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256
+// CHECK2-NEXT:    store ptr [[ADD_PTR69]], ptr [[__END267]], align 8
+// CHECK2-NEXT:    br label %[[FOR_COND70:.*]]
+// CHECK2:       [[FOR_COND70]]:
+// CHECK2-NEXT:    [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT:    [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8
+// CHECK2-NEXT:    [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]]
+// CHECK2-NEXT:    br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]]
+// CHECK2:       [[FOR_BODY72]]:
+// CHECK2-NEXT:    [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT:    store ptr [[TMP61]], ptr [[VV]], align 8
+// CHECK2-NEXT:    [[TMP62:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK2-NEXT:    [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK2-NEXT:    [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]])
+// CHECK2-NEXT:    br label %[[FOR_INC73:.*]]
+// CHECK2:       [[FOR_INC73]]:
+// CHECK2-NEXT:    [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1
+// CHECK2-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT:    br label %[[FOR_COND70]]
+// CHECK2:       [[FOR_END74]]:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @tfoo2(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT:  [[ENTRY:.*:]]
+// CHECK2-NEXT:    call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4)
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_(
+// CHECK2-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat {
+// CHECK2-NEXT:  [[ENTRY:.*:]]
+// CHECK2-NEXT:    [[START_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[END_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[STEP_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[K:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTNEW_STEP21:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 [[START]], ptr [[START_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[END]], ptr [[END_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[STEP]], ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK2-NEXT:    [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK2-NEXT:    [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT:    [[ADD5:%.*]] = add i32 [[TMP8]], 1
+// CHECK2-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP9]], ptr [[J]], align 4
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT:    [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
+// CHECK2-NEXT:    [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
+// CHECK2-NEXT:    [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK2-NEXT:    store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT:    [[ADD15:%.*]] = add i32 [[TMP17]], 1
+// CHECK2-NEXT:    store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK2-NEXT:    store i32 [[ADD16]], ptr [[K]], align 4
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT:    [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK2-NEXT:    store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT:    [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK2-NEXT:    store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK2-NEXT:    [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK2-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT:    [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]]
+// CHECK2-NEXT:    [[SUB24:%.*]] = sub i32 [[SUB23]], 1
+// CHECK2-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT:    [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]]
+// CHECK2-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT:    [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]]
+// CHECK2-NEXT:    [[SUB27:%.*]] = sub i32 [[DIV26]], 1
+// CHECK2-NEXT:    store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB2]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST2]], align 4
+// CHECK2-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK2-NEXT:    [[ADD28:%.*]] = add i32 [[TMP29]], 1
+// CHECK2-NEXT:    store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2:       [[COND_TRUE]]:
+// CHECK2-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END:.*]]
+// CHECK2:       [[COND_FALSE]]:
+// CHECK2-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END]]
+// CHECK2:       [[COND_END]]:
+// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ]
+// CHECK2-NEXT:    store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT:    [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]]
+// CHECK2-NEXT:    br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]]
+// CHECK2:       [[COND_TRUE30]]:
+// CHECK2-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT:    br label %[[COND_END32:.*]]
+// CHECK2:       [[COND_FALSE31]]:
+// CHECK2-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT:    br label %[[COND_END32]]
+// CHECK2:       [[COND_END32]]:
+// CHECK2-NEXT:    [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ]
+// CHECK2-NEXT:    store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK2:       [[FOR_COND]]:
+// CHECK2-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]]
+// CHECK2-NEXT:    br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2:       [[FOR_BODY]]:
+// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]]
+// CHECK2-NEXT:    br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK2:       [[IF_THEN]]:
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]]
+// CHECK2-NEXT:    [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]]
+// CHECK2-NEXT:    store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT:    [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]]
+// CHECK2-NEXT:    [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]]
+// CHECK2-NEXT:    store i32 [[ADD38]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP49:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP49]])
+// CHECK2-NEXT:    br label %[[IF_END]]
+// CHECK2:       [[IF_END]]:
+// CHECK2-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]]
+// CHECK2-NEXT:    br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]]
+// CHECK2:       [[IF_THEN40]]:
+// CHECK2-NEXT:    [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]]
+// CHECK2-NEXT:    [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]]
+// CHECK2-NEXT:    store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]]
+// CHECK2-NEXT:    [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]]
+// CHECK2-NEXT:    store i32 [[SUB44]], ptr [[J]], align 4
+// CHECK2-NEXT:    [[TMP58:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP58]])
+// CHECK2-NEXT:    br label %[[IF_END45]]
+// CHECK2:       [[IF_END45]]:
+// CHECK2-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT:    [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]]
+// CHECK2-NEXT:    br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK2:       [[IF_THEN47]]:
+// CHECK2-NEXT:    [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
+// CHECK2-NEXT:    [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
+// CHECK2-NEXT:    [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]]
+// CHECK2-NEXT:    [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]]
+// CHECK2-NEXT:    store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4
+// CHECK2-NEXT:    [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT:    [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
+// CHECK2-NEXT:    [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT:    [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]]
+// CHECK2-NEXT:    [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]]
+// CHECK2-NEXT:    store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK2-NEXT:    [[TMP67:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP67]])
+// CHECK2-NEXT:    br label %[[IF_END52]]
+// CHECK2:       [[IF_END52]]:
+// CHECK2-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK2:       [[FOR_INC]]:
+// CHECK2-NEXT:    [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add i32 [[TMP68]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK2:       [[FOR_END]]:
+// CHECK2-NEXT:    ret void
+//
+//.
+// CHECK1: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
+// CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"}
+// CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]}
+// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
+// CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]}
+// CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
+// CHECK1: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]}
+// CHECK1: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]}
+//.
+// CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
+// CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"}
+// CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]}
+// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
+// CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]}
+// CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
+// CHECK2: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]}
+// CHECK2: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]}
+//.
diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp
new file mode 100644
index 0000000000000..a8fb672e231d7
--- /dev/null
+++ b/clang/test/OpenMP/fuse_messages.cpp
@@ -0,0 +1,210 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -fsyntax-only -Wuninitialized -verify %s
+
+void func() {
+
+    // expected-error at +2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}}
+    #pragma omp fuse
+    ;
+
+    // expected-error at +2 {{statement after '#pragma omp fuse' must be a for loop}}
+    #pragma omp fuse
+    {int bar = 0;}
+
+    // expected-error at +4 {{statement after '#pragma omp fuse' must be a for loop}}
+    #pragma omp fuse
+    {
+        for(int i = 0; i < 10; ++i);
+        int x = 2;
+    }
+
+    // expected-error at +2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}}
+    #pragma omp fuse
+    #pragma omp for
+    for (int i = 0; i < 7; ++i)
+        ;
+
+    {
+        // expected-error at +2 {{expected statement}}
+        #pragma omp fuse
+    }
+
+    // expected-warning at +1 {{extra tokens at the end of '#pragma omp fuse' are ignored}}
+    #pragma omp fuse foo
+    {
+        for (int i = 0; i < 7; ++i)
+            ;
+        for(int j = 0; j < 100; ++j);
+
+    }
+
+
+    // expected-error at +1 {{unexpected OpenMP clause 'final' in directive '#pragma omp fuse'}}
+    #pragma omp fuse final(0)
+    {
+        for (int i = 0; i < 7; ++i)
+            ;
+        for(int j = 0; j < 100; ++j);
+
+    }
+
+    //expected-error at +4 {{loop after '#pragma omp fuse' is not in canonical form}}
+    //expected-error at +3 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'i'}}
+    #pragma omp fuse
+    {
+        for(int i = 0; i < 10; i*=2) {
+            ;
+        }
+        for(int j = 0; j < 100; ++j);
+    }
+
+    //expected-error at +2 {{loop sequence after '#pragma omp fuse' must contain at least 1 canonical loop or loop-generating construct}}
+    #pragma omp fuse
+    {}
+
+    //expected-error at +3 {{statement after '#pragma omp fuse' must be a for loop}}
+    #pragma omp fuse
+    {
+        #pragma omp unroll full
+        for(int i = 0; i < 10; ++i);
+
+        for(int j = 0; j < 10; ++j);
+    }
+
+    //expected-warning at +2 {{looprange clause selects a single loop, resulting in redundant fusion}}
+    #pragma omp fuse
+    {
+        for(int i = 0; i < 10; ++i);
+    }
+
+    //expected-warning at +1 {{looprange clause selects a single loop, resulting in redundant fusion}}
+    #pragma omp fuse looprange(1, 1)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+    }
+
+    //expected-error at +1 {{argument to 'looprange' clause must be a strictly positive integer value}}
+    #pragma omp fuse looprange(1, -1)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+    }
+
+    //expected-error at +1 {{argument to 'looprange' clause must be a strictly positive integer value}}
+    #pragma omp fuse looprange(1, 0)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+    }
+
+    const int x = 1;
+    constexpr int y = 4;
+    //expected-error at +1 {{looprange clause selects loops from 1 to 4 but this exceeds the number of loops (3) in the loop sequence}}
+    #pragma omp fuse looprange(x,y)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+        for(int k = 0; k < 50; ++k);
+    }
+
+    //expected-error at +1 {{looprange clause selects loops from 1 to 420 but this exceeds the number of loops (3) in the loop sequence}}
+    #pragma omp fuse looprange(1,420)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+        for(int k = 0; k < 50; ++k);
+    }
+
+    //expected-error at +1 {{looprange clause selects loops from 1 to 6 but this exceeds the number of loops (5) in the loop sequence}}
+    #pragma omp fuse looprange(1,6)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+        for(int k = 0; k < 50; ++k);
+        // This fusion results in 2 loops
+        #pragma omp fuse looprange(1,2)
+        {
+            for(int i = 0; i < 10; ++i);
+            for(int j = 0; j < 100; ++j);
+            for(int k = 0; k < 50; ++k);
+        }
+    }
+
+    //expected-error at +1 {{looprange clause selects loops from 2 to 4 but this exceeds the number of loops (3) in the loop sequence}}
+    #pragma omp fuse looprange(2,3)
+    {
+        #pragma omp unroll partial(2)
+        for(int i = 0; i < 10; ++i);
+
+        #pragma omp reverse
+        for(int j = 0; j < 10; ++j);
+
+        #pragma omp fuse
+        {
+            {
+                #pragma omp reverse
+                for(int j = 0; j < 10; ++j);
+            }
+            for(int k = 0; k < 50; ++k);
+        }
+    }
+}
+
+// In a template context, but expression itself not instantiation-dependent
+template <typename T>
+static void templated_func() {
+
+    //expected-warning at +1 {{looprange clause selects a single loop, resulting in redundant fusion}}
+    #pragma omp fuse looprange(2,1)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+        for(int k = 0; k < 50; ++k);
+    }
+
+    //expected-error at +1 {{looprange clause selects loops from 3 to 5 but this exceeds the number of loops (3) in the loop sequence}}
+    #pragma omp fuse looprange(3,3)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+        for(int k = 0; k < 50; ++k);
+    }
+
+}
+
+template <int V>
+static void templated_func_value_dependent() {
+
+    //expected-warning at +1 {{looprange clause selects a single loop, resulting in redundant fusion}}
+    #pragma omp fuse looprange(V,1)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+        for(int k = 0; k < 50; ++k);
+    }
+}
+
+template <typename T>
+static void templated_func_type_dependent() {
+    constexpr T s = 1;
+
+    //expected-error at +1 {{argument to 'looprange' clause must be a strictly positive integer value}}
+    #pragma omp fuse looprange(s,s-1)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+        for(int k = 0; k < 50; ++k);
+    }
+}
+
+
+void template_inst() {
+    // expected-note at +1 {{in instantiation of function template specialization 'templated_func<int>' requested here}}
+    templated_func<int>();
+    // expected-note at +1 {{in instantiation of function template specialization 'templated_func_value_dependent<1>' requested here}}
+    templated_func_value_dependent<1>();
+    // expected-note at +1 {{in instantiation of function template specialization 'templated_func_type_dependent<int>' requested here}}
+    templated_func_type_dependent<int>();
+}
+
+
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index b12b1f07c2f70..3e13526e82e50 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2161,6 +2161,9 @@ class EnqueueVisitor : public ConstStmtVisitor<EnqueueVisitor, void>,
   void VisitOMPUnrollDirective(const OMPUnrollDirective *D);
   void VisitOMPReverseDirective(const OMPReverseDirective *D);
   void VisitOMPInterchangeDirective(const OMPInterchangeDirective *D);
+  void VisitOMPCanonicalLoopSequenceTransformationDirective(
+      const OMPCanonicalLoopSequenceTransformationDirective *D);
+  void VisitOMPFuseDirective(const OMPFuseDirective *D);
   void VisitOMPForDirective(const OMPForDirective *D);
   void VisitOMPForSimdDirective(const OMPForSimdDirective *D);
   void VisitOMPSectionsDirective(const OMPSectionsDirective *D);
@@ -2366,6 +2369,11 @@ void OMPClauseEnqueue::VisitOMPPartialClause(const OMPPartialClause *C) {
   Visitor->AddStmt(C->getFactor());
 }
 
+void OMPClauseEnqueue::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) {
+  Visitor->AddStmt(C->getFirst());
+  Visitor->AddStmt(C->getCount());
+}
+
 void OMPClauseEnqueue::VisitOMPAllocatorClause(const OMPAllocatorClause *C) {
   Visitor->AddStmt(C->getAllocator());
 }
@@ -3327,6 +3335,15 @@ void EnqueueVisitor::VisitOMPInterchangeDirective(
   VisitOMPCanonicalLoopNestTransformationDirective(D);
 }
 
+void EnqueueVisitor::VisitOMPCanonicalLoopSequenceTransformationDirective(
+    const OMPCanonicalLoopSequenceTransformationDirective *D) {
+  VisitOMPExecutableDirective(D);
+}
+
+void EnqueueVisitor::VisitOMPFuseDirective(const OMPFuseDirective *D) {
+  VisitOMPCanonicalLoopSequenceTransformationDirective(D);
+}
+
 void EnqueueVisitor::VisitOMPForDirective(const OMPForDirective *D) {
   VisitOMPLoopDirective(D);
 }
@@ -6284,6 +6301,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) {
     return cxstring::createRef("OMPReverseDirective");
   case CXCursor_OMPInterchangeDirective:
     return cxstring::createRef("OMPInterchangeDirective");
+  case CXCursor_OMPFuseDirective:
+    return cxstring::createRef("OMPFuseDirective");
   case CXCursor_OMPForDirective:
     return cxstring::createRef("OMPForDirective");
   case CXCursor_OMPForSimdDirective:
diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index 3c4062410eac1..56f113c1dc309 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -687,6 +687,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
   case Stmt::OMPInterchangeDirectiveClass:
     K = CXCursor_OMPInterchangeDirective;
     break;
+  case Stmt::OMPFuseDirectiveClass:
+    K = CXCursor_OMPFuseDirective;
+    break;
   case Stmt::OMPForDirectiveClass:
     K = CXCursor_OMPForDirective;
     break;
diff --git a/flang/include/flang/Lower/OpenMP/Clauses.h b/flang/include/flang/Lower/OpenMP/Clauses.h
index 1ab594ffcd209..2decc8f815cd8 100644
--- a/flang/include/flang/Lower/OpenMP/Clauses.h
+++ b/flang/include/flang/Lower/OpenMP/Clauses.h
@@ -241,6 +241,7 @@ using Initializer = tomp::clause::InitializerT<TypeTy, IdTy, ExprTy>;
 using InReduction = tomp::clause::InReductionT<TypeTy, IdTy, ExprTy>;
 using IsDevicePtr = tomp::clause::IsDevicePtrT<TypeTy, IdTy, ExprTy>;
 using Lastprivate = tomp::clause::LastprivateT<TypeTy, IdTy, ExprTy>;
+using LoopRange = tomp::clause::LoopRangeT<TypeTy, IdTy, ExprTy>;
 using Linear = tomp::clause::LinearT<TypeTy, IdTy, ExprTy>;
 using Link = tomp::clause::LinkT<TypeTy, IdTy, ExprTy>;
 using Map = tomp::clause::MapT<TypeTy, IdTy, ExprTy>;
diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index 7170dfb591fae..efea579f3a2d0 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -616,6 +616,7 @@ class ParseTreeDumper {
   NODE_ENUM(OmpLinearModifier, Value)
   NODE(parser, OmpLocator)
   NODE(parser, OmpLocatorList)
+  NODE(parser, OmpLoopRangeClause)
   NODE(parser, OmpLoopDirective)
   NODE(parser, OmpMapClause)
   NODE(OmpMapClause, Modifier)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index 1d1a4a163084b..f2ad599347995 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4510,6 +4510,15 @@ struct OmpLinearClause {
   std::tuple<OmpObjectList, MODIFIERS(), /*PostModified=*/bool> t;
 };
 
+// Ref: [6.0:207-208]
+//
+// loop-range-clause ->
+//    LOOPRANGE(first, count)                       // since 6.0
+struct OmpLoopRangeClause {
+  TUPLE_CLASS_BOILERPLATE(OmpLoopRangeClause);
+  std::tuple<ScalarIntConstantExpr, ScalarIntConstantExpr> t;
+};
+
 // Ref: [4.5:216-219], [5.0:315-324], [5.1:347-355], [5.2:150-158]
 //
 // map-clause ->
diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
index 1a16e1c87e250..1b7dc8cdde1b1 100644
--- a/flang/lib/Lower/OpenMP/Clauses.cpp
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -1031,6 +1031,11 @@ Link make(const parser::OmpClause::Link &inp,
   return Link{/*List=*/makeObjects(inp.v, semaCtx)};
 }
 
+LoopRange make(const parser::OmpClause::Looprange &inp,
+               semantics::SemanticsContext &semaCtx) {
+  llvm_unreachable("Unimplemented: looprange");
+}
+
 Map make(const parser::OmpClause::Map &inp,
          semantics::SemanticsContext &semaCtx) {
   // inp.v -> parser::OmpMapClause
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index 51b49a591b02f..0916b2ce29aa7 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -952,6 +952,9 @@ TYPE_PARSER(
         maybe(":"_tok >> nonemptyList(Parser<OmpLinearClause::Modifier>{})),
         /*PostModified=*/pure(true)))
 
+TYPE_PARSER(construct<OmpLoopRangeClause>(
+    scalarIntConstantExpr, "," >> scalarIntConstantExpr))
+
 // OpenMPv5.2 12.5.2 detach-clause -> DETACH (event-handle)
 TYPE_PARSER(construct<OmpDetachClause>(Parser<OmpObject>{}))
 
@@ -1128,6 +1131,8 @@ TYPE_PARSER( //
                     parenthesized(Parser<OmpLinearClause>{}))) ||
     "LINK" >> construct<OmpClause>(construct<OmpClause::Link>(
                   parenthesized(Parser<OmpObjectList>{}))) ||
+    "LOOPRANGE" >> construct<OmpClause>(construct<OmpClause::Looprange>(
+                       parenthesized(Parser<OmpLoopRangeClause>{}))) ||
     "MAP" >> construct<OmpClause>(construct<OmpClause::Map>(
                  parenthesized(Parser<OmpMapClause>{}))) ||
     "MATCH" >> construct<OmpClause>(construct<OmpClause::Match>(
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 09dcfe60a46bc..773adabb3a0b0 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2328,6 +2328,13 @@ class UnparseVisitor {
       }
     }
   }
+  void Unparse(const OmpLoopRangeClause &x) {
+    Word("LOOPRANGE(");
+    Walk(std::get<0>(x.t));
+    Put(", ");
+    Walk(std::get<1>(x.t));
+    Put(")");
+  }
   void Unparse(const OmpReductionClause &x) {
     using Modifier = OmpReductionClause::Modifier;
     Walk(std::get<std::optional<std::list<Modifier>>>(x.t), ": ");
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index 6dad9a3d0711d..a426d4580e1a1 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -2786,6 +2786,12 @@ CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Collapse, OMPC_collapse)
 CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Safelen, OMPC_safelen)
 CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Simdlen, OMPC_simdlen)
 
+void OmpStructureChecker::Enter(const parser::OmpClause::Looprange &x) {
+  context_.Say(GetContext().clauseSource,
+      "LOOPRANGE clause is not implemented yet"_err_en_US,
+      ContextDirectiveAsFortran());
+}
+
 // Restrictions specific to each clause are implemented apart from the
 // generalized restrictions.
 
diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
index 8ea50e7e8d416..26e6b699b02c1 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
@@ -1244,6 +1244,15 @@ struct WriteT {
   using EmptyTrait = std::true_type;
 };
 
+// V6: [6.4.7] Looprange clause
+template <typename T, typename I, typename E> struct LoopRangeT {
+  using Begin = E;
+  using End = E;
+
+  using TupleTrait = std::true_type;
+  std::tuple<Begin, End> t;
+};
+
 // ---
 
 template <typename T, typename I, typename E>
@@ -1275,8 +1284,8 @@ using TupleClausesT =
                  DoacrossT<T, I, E>, DynGroupprivateT<T, I, E>, FromT<T, I, E>,
                  GrainsizeT<T, I, E>, IfT<T, I, E>, InitT<T, I, E>,
                  InReductionT<T, I, E>, LastprivateT<T, I, E>, LinearT<T, I, E>,
-                 MapT<T, I, E>, NumTasksT<T, I, E>, OrderT<T, I, E>,
-                 ReductionT<T, I, E>, ScheduleT<T, I, E>,
+                 LoopRangeT<T, I, E>, MapT<T, I, E>, NumTasksT<T, I, E>,
+                 OrderT<T, I, E>, ReductionT<T, I, E>, ScheduleT<T, I, E>,
                  TaskReductionT<T, I, E>, ToT<T, I, E>>;
 
 template <typename T, typename I, typename E>
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 1f1d7d06b4269..54416b1fe1f1e 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -277,6 +277,10 @@ def OMPC_Linear : Clause<[Spelling<"linear">]> {
 def OMPC_Link : Clause<[Spelling<"link">]> {
   let flangClass = "OmpObjectList";
 }
+def OMPC_LoopRange : Clause<[Spelling<"looprange">]> {
+  let clangClass = "OMPLoopRangeClause";
+  let flangClass = "OmpLoopRangeClause";
+}
 def OMPC_Map : Clause<[Spelling<"map">]> {
   let clangClass = "OMPMapClause";
   let flangClass = "OmpMapClause";
@@ -884,6 +888,11 @@ def OMP_Groupprivate : Directive<[Spelling<"groupprivate">]> {
   let category = CA_Declarative;
   let languages = [L_C, L_Fortran];
 }
+def OMP_Fuse : Directive<[Spelling<"fuse">]> {
+  let allowedOnceClauses = [VersionedClause<OMPC_LoopRange, 60>];
+  let association = AS_Block;
+  let category = CA_Executable;
+}
 def OMP_Interchange : Directive<[Spelling<"interchange">]> {
   let allowedOnceClauses = [
     VersionedClause<OMPC_Permutation>,
diff --git a/openmp/runtime/test/transform/fuse/foreach.cpp b/openmp/runtime/test/transform/fuse/foreach.cpp
new file mode 100644
index 0000000000000..176465b201faa
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/foreach.cpp
@@ -0,0 +1,191 @@
+// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdarg>
+#include <cstdio>
+#include <vector>
+
+struct Reporter {
+  const char *name;
+
+  Reporter(const char *name) : name(name) { print("ctor"); }
+
+  Reporter() : name("<anon>") { print("ctor"); }
+
+  Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); }
+
+  Reporter(Reporter &&that) : name(that.name) { print("move ctor"); }
+
+  ~Reporter() { print("dtor"); }
+
+  const Reporter &operator=(const Reporter &that) {
+    print("copy assign");
+    this->name = that.name;
+    return *this;
+  }
+
+  const Reporter &operator=(Reporter &&that) {
+    print("move assign");
+    this->name = that.name;
+    return *this;
+  }
+
+  struct Iterator {
+    const Reporter *owner;
+    int pos;
+
+    Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {}
+
+    Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) {
+      owner->print("iterator copy ctor");
+    }
+
+    Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) {
+      owner->print("iterator move ctor");
+    }
+
+    ~Iterator() { owner->print("iterator dtor"); }
+
+    const Iterator &operator=(const Iterator &that) {
+      owner->print("iterator copy assign");
+      this->owner = that.owner;
+      this->pos = that.pos;
+      return *this;
+    }
+
+    const Iterator &operator=(Iterator &&that) {
+      owner->print("iterator move assign");
+      this->owner = that.owner;
+      this->pos = that.pos;
+      return *this;
+    }
+
+    bool operator==(const Iterator &that) const {
+      owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos);
+      return this->pos == that.pos;
+    }
+
+    Iterator &operator++() {
+      owner->print("iterator prefix ++");
+      pos -= 1;
+      return *this;
+    }
+
+    Iterator operator++(int) {
+      owner->print("iterator postfix ++");
+      auto result = *this;
+      pos -= 1;
+      return result;
+    }
+
+    int operator*() const {
+      int result = 2 - pos;
+      owner->print("iterator deref: %i", result);
+      return result;
+    }
+
+    size_t operator-(const Iterator &that) const {
+      int result = (2 - this->pos) - (2 - that.pos);
+      owner->print("iterator distance: %d", result);
+      return result;
+    }
+
+    Iterator operator+(int steps) const {
+      owner->print("iterator advance: %i += %i", 2 - this->pos, steps);
+      return Iterator(owner, pos - steps);
+    }
+
+    void print(const char *msg) const { owner->print(msg); }
+  };
+
+  Iterator begin() const {
+    print("begin()");
+    return Iterator(this, 2);
+  }
+
+  Iterator end() const {
+    print("end()");
+    return Iterator(this, -1);
+  }
+
+  void print(const char *msg, ...) const {
+    va_list args;
+    va_start(args, msg);
+    printf("[%s] ", name);
+    vprintf(msg, args);
+    printf("\n");
+    va_end(args);
+  }
+};
+
+int main() {
+  printf("do\n");
+#pragma omp fuse
+  {
+    for (Reporter a{"C"}; auto &&v : Reporter("A"))
+      printf("v=%d\n", v);
+    for (Reporter aa{"D"}; auto &&vv : Reporter("B"))
+      printf("vv=%d\n", vv);
+  }
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+// CHECK: [C] ctor
+// CHECK-NEXT: [A] ctor
+// CHECK-NEXT: [A] end()
+// CHECK-NEXT: [A] begin()
+// CHECK-NEXT: [A] begin()
+// CHECK-NEXT: [A] iterator distance: 3
+// CHECK-NEXT: [D] ctor
+// CHECK-NEXT: [B] ctor
+// CHECK-NEXT: [B] end()
+// CHECK-NEXT: [B] begin()
+// CHECK-NEXT: [B] begin()
+// CHECK-NEXT: [B] iterator distance: 3
+// CHECK-NEXT: [A] iterator advance: 0 += 0
+// CHECK-NEXT: [A] iterator move assign
+// CHECK-NEXT: [A] iterator deref: 0
+// CHECK-NEXT: v=0
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [B] iterator advance: 0 += 0
+// CHECK-NEXT: [B] iterator move assign
+// CHECK-NEXT: [B] iterator deref: 0
+// CHECK-NEXT: vv=0
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [A] iterator advance: 0 += 1
+// CHECK-NEXT: [A] iterator move assign
+// CHECK-NEXT: [A] iterator deref: 1
+// CHECK-NEXT: v=1
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [B] iterator advance: 0 += 1
+// CHECK-NEXT: [B] iterator move assign
+// CHECK-NEXT: [B] iterator deref: 1
+// CHECK-NEXT: vv=1
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [A] iterator advance: 0 += 2
+// CHECK-NEXT: [A] iterator move assign
+// CHECK-NEXT: [A] iterator deref: 2
+// CHECK-NEXT: v=2
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [B] iterator advance: 0 += 2
+// CHECK-NEXT: [B] iterator move assign
+// CHECK-NEXT: [B] iterator deref: 2
+// CHECK-NEXT: vv=2
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [B] dtor
+// CHECK-NEXT: [D] dtor
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [A] dtor
+// CHECK-NEXT: [C] dtor
+// CHECK-NEXT: done
+
+#endif
diff --git a/openmp/runtime/test/transform/fuse/intfor.c b/openmp/runtime/test/transform/fuse/intfor.c
new file mode 100644
index 0000000000000..b8171b4df7042
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/intfor.c
@@ -0,0 +1,50 @@
+// RUN: %libomp-compile-and-run  | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main() {
+  printf("do\n");
+#pragma omp fuse
+  {
+    for (int i = 5; i <= 25; i += 5)
+      printf("i=%d\n", i);
+    for (int j = 10; j < 100; j += 10)
+      printf("j=%d\n", j);
+    for (int k = 10; k > 0; --k)
+      printf("k=%d\n", k);
+  }
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+#endif /* HEADER */
+
+// CHECK: do
+// CHECK-NEXT: i=5
+// CHECK-NEXT: j=10
+// CHECK-NEXT: k=10
+// CHECK-NEXT: i=10
+// CHECK-NEXT: j=20
+// CHECK-NEXT: k=9
+// CHECK-NEXT: i=15
+// CHECK-NEXT: j=30
+// CHECK-NEXT: k=8
+// CHECK-NEXT: i=20
+// CHECK-NEXT: j=40
+// CHECK-NEXT: k=7
+// CHECK-NEXT: i=25
+// CHECK-NEXT: j=50
+// CHECK-NEXT: k=6
+// CHECK-NEXT: j=60
+// CHECK-NEXT: k=5
+// CHECK-NEXT: j=70
+// CHECK-NEXT: k=4
+// CHECK-NEXT: j=80
+// CHECK-NEXT: k=3
+// CHECK-NEXT: j=90
+// CHECK-NEXT: k=2
+// CHECK-NEXT: k=1
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/fuse/iterfor.cpp b/openmp/runtime/test/transform/fuse/iterfor.cpp
new file mode 100644
index 0000000000000..552484b2981c4
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/iterfor.cpp
@@ -0,0 +1,194 @@
+// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdarg>
+#include <cstdio>
+#include <vector>
+
+struct Reporter {
+  const char *name;
+
+  Reporter(const char *name) : name(name) { print("ctor"); }
+
+  Reporter() : name("<anon>") { print("ctor"); }
+
+  Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); }
+
+  Reporter(Reporter &&that) : name(that.name) { print("move ctor"); }
+
+  ~Reporter() { print("dtor"); }
+
+  const Reporter &operator=(const Reporter &that) {
+    print("copy assign");
+    this->name = that.name;
+    return *this;
+  }
+
+  const Reporter &operator=(Reporter &&that) {
+    print("move assign");
+    this->name = that.name;
+    return *this;
+  }
+
+  struct Iterator {
+    const Reporter *owner;
+    int pos;
+
+    Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {}
+
+    Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) {
+      owner->print("iterator copy ctor");
+    }
+
+    Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) {
+      owner->print("iterator move ctor");
+    }
+
+    ~Iterator() { owner->print("iterator dtor"); }
+
+    const Iterator &operator=(const Iterator &that) {
+      owner->print("iterator copy assign");
+      this->owner = that.owner;
+      this->pos = that.pos;
+      return *this;
+    }
+
+    const Iterator &operator=(Iterator &&that) {
+      owner->print("iterator move assign");
+      this->owner = that.owner;
+      this->pos = that.pos;
+      return *this;
+    }
+
+    bool operator==(const Iterator &that) const {
+      owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos);
+      return this->pos == that.pos;
+    }
+
+    bool operator!=(const Iterator &that) const {
+      owner->print("iterator %d != %d", 2 - this->pos, 2 - that.pos);
+      return this->pos != that.pos;
+    }
+
+    Iterator &operator++() {
+      owner->print("iterator prefix ++");
+      pos -= 1;
+      return *this;
+    }
+
+    Iterator operator++(int) {
+      owner->print("iterator postfix ++");
+      auto result = *this;
+      pos -= 1;
+      return result;
+    }
+
+    int operator*() const {
+      int result = 2 - pos;
+      owner->print("iterator deref: %i", result);
+      return result;
+    }
+
+    size_t operator-(const Iterator &that) const {
+      int result = (2 - this->pos) - (2 - that.pos);
+      owner->print("iterator distance: %d", result);
+      return result;
+    }
+
+    Iterator operator+(int steps) const {
+      owner->print("iterator advance: %i += %i", 2 - this->pos, steps);
+      return Iterator(owner, pos - steps);
+    }
+  };
+
+  Iterator begin() const {
+    print("begin()");
+    return Iterator(this, 2);
+  }
+
+  Iterator end() const {
+    print("end()");
+    return Iterator(this, -1);
+  }
+
+  void print(const char *msg, ...) const {
+    va_list args;
+    va_start(args, msg);
+    printf("[%s] ", name);
+    vprintf(msg, args);
+    printf("\n");
+    va_end(args);
+  }
+};
+
+int main() {
+  printf("do\n");
+  Reporter C("C");
+  Reporter D("D");
+#pragma omp fuse
+  {
+    for (auto it = C.begin(); it != C.end(); ++it)
+      printf("v=%d\n", *it);
+
+    for (auto it = D.begin(); it != D.end(); ++it)
+      printf("vv=%d\n", *it);
+  }
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK: do
+// CHECK: [C] ctor
+// CHECK-NEXT: [D] ctor
+// CHECK-NEXT: [C] begin()
+// CHECK-NEXT: [C] begin()
+// CHECK-NEXT: [C] end()
+// CHECK-NEXT: [C] iterator distance: 3
+// CHECK-NEXT: [D] begin()
+// CHECK-NEXT: [D] begin()
+// CHECK-NEXT: [D] end()
+// CHECK-NEXT: [D] iterator distance: 3
+// CHECK-NEXT: [C] iterator advance: 0 += 0
+// CHECK-NEXT: [C] iterator move assign
+// CHECK-NEXT: [C] iterator deref: 0
+// CHECK-NEXT: v=0
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [D] iterator advance: 0 += 0
+// CHECK-NEXT: [D] iterator move assign
+// CHECK-NEXT: [D] iterator deref: 0
+// CHECK-NEXT: vv=0
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [C] iterator advance: 0 += 1
+// CHECK-NEXT: [C] iterator move assign
+// CHECK-NEXT: [C] iterator deref: 1
+// CHECK-NEXT: v=1
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [D] iterator advance: 0 += 1
+// CHECK-NEXT: [D] iterator move assign
+// CHECK-NEXT: [D] iterator deref: 1
+// CHECK-NEXT: vv=1
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [C] iterator advance: 0 += 2
+// CHECK-NEXT: [C] iterator move assign
+// CHECK-NEXT: [C] iterator deref: 2
+// CHECK-NEXT: v=2
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [D] iterator advance: 0 += 2
+// CHECK-NEXT: [D] iterator move assign
+// CHECK-NEXT: [D] iterator deref: 2
+// CHECK-NEXT: vv=2
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: done
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [D] dtor
+// CHECK-NEXT: [C] dtor
diff --git a/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp
new file mode 100644
index 0000000000000..dcbbdf1b6734e
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp
@@ -0,0 +1,207 @@
+// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdarg>
+#include <cstdio>
+#include <vector>
+
+struct Reporter {
+  const char *name;
+
+  Reporter(const char *name) : name(name) { print("ctor"); }
+
+  Reporter() : name("<anon>") { print("ctor"); }
+
+  Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); }
+
+  Reporter(Reporter &&that) : name(that.name) { print("move ctor"); }
+
+  ~Reporter() { print("dtor"); }
+
+  const Reporter &operator=(const Reporter &that) {
+    print("copy assign");
+    this->name = that.name;
+    return *this;
+  }
+
+  const Reporter &operator=(Reporter &&that) {
+    print("move assign");
+    this->name = that.name;
+    return *this;
+  }
+
+  struct Iterator {
+    const Reporter *owner;
+    int pos;
+
+    Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {}
+
+    Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) {
+      owner->print("iterator copy ctor");
+    }
+
+    Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) {
+      owner->print("iterator move ctor");
+    }
+
+    ~Iterator() { owner->print("iterator dtor"); }
+
+    const Iterator &operator=(const Iterator &that) {
+      owner->print("iterator copy assign");
+      this->owner = that.owner;
+      this->pos = that.pos;
+      return *this;
+    }
+
+    const Iterator &operator=(Iterator &&that) {
+      owner->print("iterator move assign");
+      this->owner = that.owner;
+      this->pos = that.pos;
+      return *this;
+    }
+
+    bool operator==(const Iterator &that) const {
+      owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos);
+      return this->pos == that.pos;
+    }
+
+    Iterator &operator++() {
+      owner->print("iterator prefix ++");
+      pos -= 1;
+      return *this;
+    }
+
+    Iterator operator++(int) {
+      owner->print("iterator postfix ++");
+      auto result = *this;
+      pos -= 1;
+      return result;
+    }
+
+    int operator*() const {
+      int result = 2 - pos;
+      owner->print("iterator deref: %i", result);
+      return result;
+    }
+
+    size_t operator-(const Iterator &that) const {
+      int result = (2 - this->pos) - (2 - that.pos);
+      owner->print("iterator distance: %d", result);
+      return result;
+    }
+
+    Iterator operator+(int steps) const {
+      owner->print("iterator advance: %i += %i", 2 - this->pos, steps);
+      return Iterator(owner, pos - steps);
+    }
+
+    void print(const char *msg) const { owner->print(msg); }
+  };
+
+  Iterator begin() const {
+    print("begin()");
+    return Iterator(this, 2);
+  }
+
+  Iterator end() const {
+    print("end()");
+    return Iterator(this, -1);
+  }
+
+  void print(const char *msg, ...) const {
+    va_list args;
+    va_start(args, msg);
+    printf("[%s] ", name);
+    vprintf(msg, args);
+    printf("\n");
+    va_end(args);
+  }
+};
+
+int main() {
+  printf("do\n");
+#pragma omp parallel for collapse(2) num_threads(1)
+  for (int i = 0; i < 3; ++i)
+#pragma omp fuse
+  {
+    for (Reporter c{"init-stmt"}; auto &&v : Reporter("range"))
+      printf("i=%d v=%d\n", i, v);
+    for (int vv = 0; vv < 3; ++vv)
+      printf("i=%d vv=%d\n", i, vv);
+  }
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK: do
+// CHECK-NEXT: [init-stmt] ctor
+// CHECK-NEXT: [range] ctor
+// CHECK-NEXT: [range] end()
+// CHECK-NEXT: [range] begin()
+// CHECK-NEXT: [range] begin()
+// CHECK-NEXT: [range] iterator distance: 3
+// CHECK-NEXT: [range] iterator advance: 0 += 0
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 0
+// CHECK-NEXT: i=0 v=0
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=0 vv=0
+// CHECK-NEXT: [range] iterator advance: 0 += 1
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 1
+// CHECK-NEXT: i=0 v=1
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=0 vv=1
+// CHECK-NEXT: [range] iterator advance: 0 += 2
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 2
+// CHECK-NEXT: i=0 v=2
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=0 vv=2
+// CHECK-NEXT: [range] iterator advance: 0 += 0
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 0
+// CHECK-NEXT: i=1 v=0
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=1 vv=0
+// CHECK-NEXT: [range] iterator advance: 0 += 1
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 1
+// CHECK-NEXT: i=1 v=1
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=1 vv=1
+// CHECK-NEXT: [range] iterator advance: 0 += 2
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 2
+// CHECK-NEXT: i=1 v=2
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=1 vv=2
+// CHECK-NEXT: [range] iterator advance: 0 += 0
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 0
+// CHECK-NEXT: i=2 v=0
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=2 vv=0
+// CHECK-NEXT: [range] iterator advance: 0 += 1
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 1
+// CHECK-NEXT: i=2 v=1
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=2 vv=1
+// CHECK-NEXT: [range] iterator advance: 0 += 2
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 2
+// CHECK-NEXT: i=2 v=2
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=2 vv=2
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: [range] dtor
+// CHECK-NEXT: [init-stmt] dtor
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c
new file mode 100644
index 0000000000000..9630fec50bc20
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c
@@ -0,0 +1,45 @@
+// RUN: %libomp-cxx-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdio>
+
+int main() {
+  printf("do\n");
+#pragma omp parallel for collapse(2) num_threads(1)
+  for (int i = 0; i < 3; ++i)
+#pragma omp fuse
+  {
+    for (int j = 0; j < 3; ++j)
+      printf("i=%d j=%d\n", i, j);
+    for (int k = 0; k < 3; ++k)
+      printf("i=%d k=%d\n", i, k);
+  }
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK:      do
+// CHECK-NEXT: i=0 j=0
+// CHECK-NEXT: i=0 k=0
+// CHECK-NEXT: i=0 j=1
+// CHECK-NEXT: i=0 k=1
+// CHECK-NEXT: i=0 j=2
+// CHECK-NEXT: i=0 k=2
+// CHECK-NEXT: i=1 j=0
+// CHECK-NEXT: i=1 k=0
+// CHECK-NEXT: i=1 j=1
+// CHECK-NEXT: i=1 k=1
+// CHECK-NEXT: i=1 j=2
+// CHECK-NEXT: i=1 k=2
+// CHECK-NEXT: i=2 j=0
+// CHECK-NEXT: i=2 k=0
+// CHECK-NEXT: i=2 j=1
+// CHECK-NEXT: i=2 k=1
+// CHECK-NEXT: i=2 j=2
+// CHECK-NEXT: i=2 k=2
+// CHECK-NEXT: done



More information about the Openmp-commits mailing list