[clang] [flang] [llvm] [openmp] [Clang][OpenMP][LoopTransformations] Add support for "#pragma omp fuse" loop transformation directive and "looprange" clause (PR #139293)

Walter J.T.V via cfe-commits cfe-commits at lists.llvm.org
Fri May 23 10:48:05 PDT 2025


https://github.com/eZWALT updated https://github.com/llvm/llvm-project/pull/139293

>From 204d902b738dcd9d260963afab3d4f8f5f1c0066 Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:25:33 +0000
Subject: [PATCH 1/9] Add fuse directive patch

---
 clang/include/clang-c/Index.h                 |    4 +
 clang/include/clang/AST/RecursiveASTVisitor.h |    3 +
 clang/include/clang/AST/StmtOpenMP.h          |  105 +-
 .../clang/Basic/DiagnosticSemaKinds.td        |    8 +
 clang/include/clang/Basic/StmtNodes.td        |    1 +
 clang/include/clang/Sema/SemaOpenMP.h         |   27 +
 .../include/clang/Serialization/ASTBitCodes.h |    1 +
 clang/lib/AST/StmtOpenMP.cpp                  |   25 +
 clang/lib/AST/StmtPrinter.cpp                 |    5 +
 clang/lib/AST/StmtProfile.cpp                 |    4 +
 clang/lib/Basic/OpenMPKinds.cpp               |    2 +-
 clang/lib/CodeGen/CGStmt.cpp                  |    3 +
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |    8 +
 clang/lib/CodeGen/CodeGenFunction.h           |    1 +
 clang/lib/Sema/SemaExceptionSpec.cpp          |    1 +
 clang/lib/Sema/SemaOpenMP.cpp                 |  600 +++++++
 clang/lib/Sema/TreeTransform.h                |   11 +
 clang/lib/Serialization/ASTReaderStmt.cpp     |   11 +
 clang/lib/Serialization/ASTWriterStmt.cpp     |    6 +
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp  |    1 +
 clang/test/OpenMP/fuse_ast_print.cpp          |  278 +++
 clang/test/OpenMP/fuse_codegen.cpp            | 1511 +++++++++++++++++
 clang/test/OpenMP/fuse_messages.cpp           |   76 +
 clang/tools/libclang/CIndex.cpp               |    7 +
 clang/tools/libclang/CXCursor.cpp             |    3 +
 llvm/include/llvm/Frontend/OpenMP/OMP.td      |    4 +
 .../runtime/test/transform/fuse/foreach.cpp   |  192 +++
 openmp/runtime/test/transform/fuse/intfor.c   |   50 +
 .../runtime/test/transform/fuse/iterfor.cpp   |  194 +++
 .../fuse/parallel-wsloop-collapse-foreach.cpp |  208 +++
 .../fuse/parallel-wsloop-collapse-intfor.c    |   45 +
 31 files changed, 3391 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/OpenMP/fuse_ast_print.cpp
 create mode 100644 clang/test/OpenMP/fuse_codegen.cpp
 create mode 100644 clang/test/OpenMP/fuse_messages.cpp
 create mode 100644 openmp/runtime/test/transform/fuse/foreach.cpp
 create mode 100644 openmp/runtime/test/transform/fuse/intfor.c
 create mode 100644 openmp/runtime/test/transform/fuse/iterfor.cpp
 create mode 100644 openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp
 create mode 100644 openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c

diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h
index d30d15e53802a..00046de62a742 100644
--- a/clang/include/clang-c/Index.h
+++ b/clang/include/clang-c/Index.h
@@ -2162,6 +2162,10 @@ enum CXCursorKind {
    */
   CXCursor_OMPStripeDirective = 310,
 
+  /** OpenMP fuse directive
+   */
+  CXCursor_OMPFuseDirective = 318,
+
   /** OpenACC Compute Construct.
    */
   CXCursor_OpenACCComputeConstruct = 320,
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 23a8c4f1f7380..057e9e346ce4e 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -3080,6 +3080,9 @@ DEF_TRAVERSE_STMT(OMPUnrollDirective,
 DEF_TRAVERSE_STMT(OMPReverseDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
+DEF_TRAVERSE_STMT(OMPFuseDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
 DEF_TRAVERSE_STMT(OMPInterchangeDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index 736bcabbad1f7..dc6f797e24ab8 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -962,6 +962,9 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective {
 
   /// Number of loops generated by this loop transformation.
   unsigned NumGeneratedLoops = 0;
+  /// Number of top level canonical loop nests generated by this loop
+  /// transformation
+  unsigned NumGeneratedLoopNests = 0;
 
 protected:
   explicit OMPLoopTransformationDirective(StmtClass SC,
@@ -973,6 +976,9 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective {
 
   /// Set the number of loops generated by this loop transformation.
   void setNumGeneratedLoops(unsigned Num) { NumGeneratedLoops = Num; }
+  /// Set the number of top level canonical loop nests generated by this loop
+  /// transformation
+  void setNumGeneratedLoopNests(unsigned Num) { NumGeneratedLoopNests = Num; }
 
 public:
   /// Return the number of associated (consumed) loops.
@@ -981,6 +987,10 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective {
   /// Return the number of loops generated by this loop transformation.
   unsigned getNumGeneratedLoops() const { return NumGeneratedLoops; }
 
+  /// Return the number of top level canonical loop nests generated by this loop
+  /// transformation
+  unsigned getNumGeneratedLoopNests() const { return NumGeneratedLoopNests; }
+
   /// Get the de-sugared statements after the loop transformation.
   ///
   /// Might be nullptr if either the directive generates no loops and is handled
@@ -995,7 +1005,8 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective {
     Stmt::StmtClass C = T->getStmtClass();
     return C == OMPTileDirectiveClass || C == OMPUnrollDirectiveClass ||
            C == OMPReverseDirectiveClass || C == OMPInterchangeDirectiveClass ||
-           C == OMPStripeDirectiveClass;
+           C == OMPStripeDirectiveClass ||
+           C == OMPFuseDirectiveClass;
   }
 };
 
@@ -5562,6 +5573,7 @@ class OMPTileDirective final : public OMPLoopTransformationDirective {
                                        llvm::omp::OMPD_tile, StartLoc, EndLoc,
                                        NumLoops) {
     setNumGeneratedLoops(2 * NumLoops);
+    setNumGeneratedLoopNests(1);
   }
 
   void setPreInits(Stmt *PreInits) {
@@ -5790,7 +5802,11 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective {
   explicit OMPReverseDirective(SourceLocation StartLoc, SourceLocation EndLoc)
       : OMPLoopTransformationDirective(OMPReverseDirectiveClass,
                                        llvm::omp::OMPD_reverse, StartLoc,
-                                       EndLoc, 1) {}
+                                       EndLoc, 1) {
+
+    setNumGeneratedLoopNests(1);
+    setNumGeneratedLoops(1);
+  }
 
   void setPreInits(Stmt *PreInits) {
     Data->getChildren()[PreInitsOffset] = PreInits;
@@ -5857,7 +5873,8 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective {
       : OMPLoopTransformationDirective(OMPInterchangeDirectiveClass,
                                        llvm::omp::OMPD_interchange, StartLoc,
                                        EndLoc, NumLoops) {
-    setNumGeneratedLoops(3 * NumLoops);
+    setNumGeneratedLoops(NumLoops);
+    setNumGeneratedLoopNests(1);
   }
 
   void setPreInits(Stmt *PreInits) {
@@ -5908,6 +5925,88 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective {
   }
 };
 
+/// Represents the '#pragma omp fuse' loop transformation directive
+///
+/// \code{c}
+/// #pragma omp fuse
+/// {
+///   for(int i = 0; i < m1; ++i) {...}
+///   for(int j = 0; j < m2; ++j) {...}
+///   ...
+/// }
+/// \endcode
+
+class OMPFuseDirective final : public OMPLoopTransformationDirective {
+  friend class ASTStmtReader;
+  friend class OMPExecutableDirective;
+
+  // Offsets of child members.
+  enum {
+    PreInitsOffset = 0,
+    TransformedStmtOffset,
+  };
+
+  explicit OMPFuseDirective(SourceLocation StartLoc, SourceLocation EndLoc,
+                            unsigned NumLoops)
+      : OMPLoopTransformationDirective(OMPFuseDirectiveClass,
+                                       llvm::omp::OMPD_fuse, StartLoc, EndLoc,
+                                       NumLoops) {
+    setNumGeneratedLoops(1);
+    // TODO: After implementing the looprange clause, change this logic
+    setNumGeneratedLoopNests(1);
+  }
+
+  void setPreInits(Stmt *PreInits) {
+    Data->getChildren()[PreInitsOffset] = PreInits;
+  }
+
+  void setTransformedStmt(Stmt *S) {
+    Data->getChildren()[TransformedStmtOffset] = S;
+  }
+
+public:
+  /// Create a new AST node representation for #pragma omp fuse'
+  ///
+  /// \param C Context of the AST
+  /// \param StartLoc Location of the introducer (e.g the 'omp' token)
+  /// \param EndLoc Location of the directive's end (e.g the tok::eod)
+  /// \param Clauses The directive's clauses
+  /// \param NumLoops Number of total affected loops
+  /// \param NumLoopNests Number of affected top level canonical loops
+  ///                 (number of items in the 'looprange' clause if present)
+  /// \param AssociatedStmt The outermost associated loop
+  /// \param TransformedStmt The loop nest after fusion, or nullptr in
+  ///                        dependent
+  /// \param PreInits Helper preinits statements for the loop nest
+  static OMPFuseDirective *Create(const ASTContext &C, SourceLocation StartLoc,
+                                  SourceLocation EndLoc,
+                                  ArrayRef<OMPClause *> Clauses,
+                                  unsigned NumLoops, unsigned NumLoopNests,
+                                  Stmt *AssociatedStmt, Stmt *TransformedStmt,
+                                  Stmt *PreInits);
+
+  /// Build an empty '#pragma omp fuse' AST node for deserialization
+  ///
+  /// \param C Context of the AST
+  /// \param NumClauses Number of clauses to allocate
+  /// \param NumLoops Number of associated loops to allocate
+  static OMPFuseDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses,
+                                       unsigned NumLoops);
+
+  /// Gets the associated loops after the transformation. This is the de-sugared
+  /// replacement or nulltpr in dependent contexts.
+  Stmt *getTransformedStmt() const {
+    return Data->getChildren()[TransformedStmtOffset];
+  }
+
+  /// Return preinits statement.
+  Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == OMPFuseDirectiveClass;
+  }
+};
+
 /// This represents '#pragma omp scan' directive.
 ///
 /// \code
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 78b36ceb88125..f31b6f8a3b26a 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11558,6 +11558,14 @@ def note_omp_implicit_dsa : Note<
   "implicitly determined as %0">;
 def err_omp_loop_var_dsa : Error<
   "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">;
+def warn_omp_different_loop_ind_var_types : Warning <
+  "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">;
+def err_omp_not_canonical_loop : Error <
+  "loop after '#pragma omp %0' is not in canonical form">;
+def err_omp_not_a_loop_sequence : Error < 
+  "statement after '#pragma omp %0' must be a loop sequence containing canonical loops or loop-generating constructs">;
+def err_omp_empty_loop_sequence : Error <
+  "loop sequence after '#pragma omp %0' must contain at least 1 canonical loop or loop-generating construct">;
 def err_omp_not_for : Error<
   "%select{statement after '#pragma omp %1' must be a for loop|"
   "expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">;
diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index 9526fa5808aa5..739160342062c 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -234,6 +234,7 @@ def OMPStripeDirective : StmtNode<OMPLoopTransformationDirective>;
 def OMPUnrollDirective : StmtNode<OMPLoopTransformationDirective>;
 def OMPReverseDirective : StmtNode<OMPLoopTransformationDirective>;
 def OMPInterchangeDirective : StmtNode<OMPLoopTransformationDirective>;
+def OMPFuseDirective : StmtNode<OMPLoopTransformationDirective>;
 def OMPForDirective : StmtNode<OMPLoopDirective>;
 def OMPForSimdDirective : StmtNode<OMPLoopDirective>;
 def OMPSectionsDirective : StmtNode<OMPExecutableDirective>;
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index 6498390fe96f7..8d78c2197c89d 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -457,6 +457,13 @@ class SemaOpenMP : public SemaBase {
                                              Stmt *AStmt,
                                              SourceLocation StartLoc,
                                              SourceLocation EndLoc);
+
+  /// Called on well-formed '#pragma omp fuse' after parsing of its
+  /// clauses and the associated statement.
+  StmtResult ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
+                                      Stmt *AStmt, SourceLocation StartLoc,
+                                      SourceLocation EndLoc);
+
   /// Called on well-formed '\#pragma omp for' after parsing
   /// of the associated statement.
   StmtResult
@@ -1480,6 +1487,26 @@ class SemaOpenMP : public SemaBase {
       SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
       Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits);
 
+  /// Analyzes and checks a loop sequence for use by a loop transformation
+  ///
+  /// \param Kind           The loop transformation directive kind.
+  /// \param NumLoops       [out] Number of total canonical loops
+  /// \param LoopSeqSize    [out] Number of top level canonical loops
+  /// \param LoopHelpers    [out] The multiple loop analyses results.
+  /// \param LoopStmts      [out] The multiple Stmt of each For loop.
+  /// \param OriginalInits  [out] The multiple collection of statements and
+  ///                       declarations that must have been executed/declared
+  ///                       before entering the loop.
+  /// \param Context
+  /// \return Whether there was an absence of errors or not
+  bool checkTransformableLoopSequence(
+      OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize,
+      unsigned &NumLoops,
+      SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
+      SmallVectorImpl<Stmt *> &ForStmts,
+      SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
+      ASTContext &Context);
+
   /// Helper to keep information about the current `omp begin/end declare
   /// variant` nesting.
   struct OMPDeclareVariantScope {
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 5cb9998126a85..8fe9d8248d66f 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1948,6 +1948,7 @@ enum StmtCode {
   STMT_OMP_UNROLL_DIRECTIVE,
   STMT_OMP_REVERSE_DIRECTIVE,
   STMT_OMP_INTERCHANGE_DIRECTIVE,
+  STMT_OMP_FUSE_DIRECTIVE,
   STMT_OMP_FOR_DIRECTIVE,
   STMT_OMP_FOR_SIMD_DIRECTIVE,
   STMT_OMP_SECTIONS_DIRECTIVE,
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index 093e1f659916f..4a6133766ef1c 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -456,6 +456,8 @@ OMPUnrollDirective::Create(const ASTContext &C, SourceLocation StartLoc,
   auto *Dir = createDirective<OMPUnrollDirective>(
       C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc);
   Dir->setNumGeneratedLoops(NumGeneratedLoops);
+  // The number of generated loops and loop nests during unroll matches
+  Dir->setNumGeneratedLoopNests(NumGeneratedLoops);
   Dir->setTransformedStmt(TransformedStmt);
   Dir->setPreInits(PreInits);
   return Dir;
@@ -505,6 +507,29 @@ OMPInterchangeDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
       SourceLocation(), SourceLocation(), NumLoops);
 }
 
+OMPFuseDirective *OMPFuseDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    ArrayRef<OMPClause *> Clauses, unsigned NumLoops, unsigned NumLoopNests,
+    Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits) {
+
+  OMPFuseDirective *Dir = createDirective<OMPFuseDirective>(
+      C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc,
+      NumLoops);
+  Dir->setTransformedStmt(TransformedStmt);
+  Dir->setPreInits(PreInits);
+  Dir->setNumGeneratedLoopNests(NumLoopNests);
+  Dir->setNumGeneratedLoops(NumLoops);
+  return Dir;
+}
+
+OMPFuseDirective *OMPFuseDirective::CreateEmpty(const ASTContext &C,
+                                                unsigned NumClauses,
+                                                unsigned NumLoops) {
+  return createEmptyDirective<OMPFuseDirective>(
+      C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1,
+      SourceLocation(), SourceLocation(), NumLoops);
+}
+
 OMPForSimdDirective *
 OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
                             SourceLocation EndLoc, unsigned CollapsedNum,
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index dc8af1586624b..12a1d5a943704 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -791,6 +791,11 @@ void StmtPrinter::VisitOMPInterchangeDirective(OMPInterchangeDirective *Node) {
   PrintOMPExecutableDirective(Node);
 }
 
+void StmtPrinter::VisitOMPFuseDirective(OMPFuseDirective *Node) {
+  Indent() << "#pragma omp fuse";
+  PrintOMPExecutableDirective(Node);
+}
+
 void StmtPrinter::VisitOMPForDirective(OMPForDirective *Node) {
   Indent() << "#pragma omp for";
   PrintOMPExecutableDirective(Node);
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index f7d1655f67ed1..99d426db985e8 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -1026,6 +1026,10 @@ void StmtProfiler::VisitOMPInterchangeDirective(
   VisitOMPLoopTransformationDirective(S);
 }
 
+void StmtProfiler::VisitOMPFuseDirective(const OMPFuseDirective *S) {
+  VisitOMPLoopTransformationDirective(S);
+}
+
 void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) {
   VisitOMPLoopDirective(S);
 }
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index a451fc7c01841..d172450512f13 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -702,7 +702,7 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) {
 
 bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) {
   return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse ||
-         DKind == OMPD_interchange || DKind == OMPD_stripe;
+         DKind == OMPD_interchange || DKind == OMPD_stripe || DKind == OMPD_fuse;
 }
 
 bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) {
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 3562b4ea22a24..4a2dc1a537d46 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -233,6 +233,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
   case Stmt::OMPInterchangeDirectiveClass:
     EmitOMPInterchangeDirective(cast<OMPInterchangeDirective>(*S));
     break;
+  case Stmt::OMPFuseDirectiveClass:
+    EmitOMPFuseDirective(cast<OMPFuseDirective>(*S));
+    break;
   case Stmt::OMPForDirectiveClass:
     EmitOMPForDirective(cast<OMPForDirective>(*S));
     break;
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 803c7ed37635e..0c664b0f89044 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -197,6 +197,8 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
     } else if (const auto *Interchange =
                    dyn_cast<OMPInterchangeDirective>(&S)) {
       PreInits = Interchange->getPreInits();
+    } else if (const auto *Fuse = dyn_cast<OMPFuseDirective>(&S)) {
+      PreInits = Fuse->getPreInits();
     } else {
       llvm_unreachable("Unknown loop-based directive kind.");
     }
@@ -2918,6 +2920,12 @@ void CodeGenFunction::EmitOMPInterchangeDirective(
   EmitStmt(S.getTransformedStmt());
 }
 
+void CodeGenFunction::EmitOMPFuseDirective(const OMPFuseDirective &S) {
+  // Emit the de-sugared statement
+  OMPTransformDirectiveScopeRAII FuseScope(*this, &S);
+  EmitStmt(S.getTransformedStmt());
+}
+
 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
   bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
 
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 78d71fc822bcb..a983901f560de 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3906,6 +3906,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   void EmitOMPUnrollDirective(const OMPUnrollDirective &S);
   void EmitOMPReverseDirective(const OMPReverseDirective &S);
   void EmitOMPInterchangeDirective(const OMPInterchangeDirective &S);
+  void EmitOMPFuseDirective(const OMPFuseDirective &S);
   void EmitOMPForDirective(const OMPForDirective &S);
   void EmitOMPForSimdDirective(const OMPForSimdDirective &S);
   void EmitOMPScopeDirective(const OMPScopeDirective &S);
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index c83eab53891ca..85a374e6eb9b2 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1491,6 +1491,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
   case Stmt::OMPUnrollDirectiveClass:
   case Stmt::OMPReverseDirectiveClass:
   case Stmt::OMPInterchangeDirectiveClass:
+  case Stmt::OMPFuseDirectiveClass:
   case Stmt::OMPSingleDirectiveClass:
   case Stmt::OMPTargetDataDirectiveClass:
   case Stmt::OMPTargetDirectiveClass:
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index f16f841d62edd..bd8bee64a9d2f 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -4404,6 +4404,7 @@ void SemaOpenMP::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind,
   case OMPD_unroll:
   case OMPD_reverse:
   case OMPD_interchange:
+  case OMPD_fuse:
   case OMPD_assume:
     break;
   default:
@@ -6221,6 +6222,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective(
     Res = ActOnOpenMPInterchangeDirective(ClausesWithImplicit, AStmt, StartLoc,
                                           EndLoc);
     break;
+  case OMPD_fuse:
+    Res =
+        ActOnOpenMPFuseDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc);
+    break;
   case OMPD_for:
     Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc,
                                   VarsWithInheritedDSA);
@@ -14193,6 +14198,8 @@ bool SemaOpenMP::checkTransformableLoopNest(
           DependentPreInits = Dir->getPreInits();
         else if (auto *Dir = dyn_cast<OMPInterchangeDirective>(Transform))
           DependentPreInits = Dir->getPreInits();
+        else if (auto *Dir = dyn_cast<OMPFuseDirective>(Transform))
+          DependentPreInits = Dir->getPreInits();
         else
           llvm_unreachable("Unhandled loop transformation");
 
@@ -14203,6 +14210,265 @@ bool SemaOpenMP::checkTransformableLoopNest(
   return Result;
 }
 
+class NestedLoopCounterVisitor
+    : public clang::RecursiveASTVisitor<NestedLoopCounterVisitor> {
+public:
+  explicit NestedLoopCounterVisitor() : NestedLoopCount(0) {}
+
+  bool VisitForStmt(clang::ForStmt *FS) {
+    ++NestedLoopCount;
+    return true;
+  }
+
+  bool VisitCXXForRangeStmt(clang::CXXForRangeStmt *FRS) {
+    ++NestedLoopCount;
+    return true;
+  }
+
+  unsigned getNestedLoopCount() const { return NestedLoopCount; }
+
+private:
+  unsigned NestedLoopCount;
+};
+
+bool SemaOpenMP::checkTransformableLoopSequence(
+    OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize,
+    unsigned &NumLoops,
+    SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
+    SmallVectorImpl<Stmt *> &ForStmts,
+    SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
+    ASTContext &Context) {
+
+  // Checks whether the given statement is a compound statement
+  VarsWithInheritedDSAType TmpDSA;
+  if (!isa<CompoundStmt>(AStmt)) {
+    Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence)
+        << getOpenMPDirectiveName(Kind);
+    return false;
+  }
+  // Callback for updating pre-inits in case there are even more
+  // loop-sequence-generating-constructs inside of the main compound stmt
+  auto OnTransformationCallback =
+      [&OriginalInits](OMPLoopBasedDirective *Transform) {
+        Stmt *DependentPreInits;
+        if (auto *Dir = dyn_cast<OMPTileDirective>(Transform))
+          DependentPreInits = Dir->getPreInits();
+        else if (auto *Dir = dyn_cast<OMPUnrollDirective>(Transform))
+          DependentPreInits = Dir->getPreInits();
+        else if (auto *Dir = dyn_cast<OMPReverseDirective>(Transform))
+          DependentPreInits = Dir->getPreInits();
+        else if (auto *Dir = dyn_cast<OMPInterchangeDirective>(Transform))
+          DependentPreInits = Dir->getPreInits();
+        else if (auto *Dir = dyn_cast<OMPFuseDirective>(Transform))
+          DependentPreInits = Dir->getPreInits();
+        else
+          llvm_unreachable("Unhandled loop transformation");
+
+        appendFlattenedStmtList(OriginalInits.back(), DependentPreInits);
+      };
+
+  // Number of top level canonical loop nests observed (And acts as index)
+  LoopSeqSize = 0;
+  // Number of total observed loops
+  NumLoops = 0;
+
+  // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows
+  // the grammar:
+  //
+  // canonical-loop-sequence:
+  //  {
+  //    loop-sequence+
+  //  }
+  // where loop-sequence can be any of the following:
+  // 1. canonical-loop-sequence
+  // 2. loop-nest
+  // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective)
+  //
+  // To recognise and traverse this structure the following helper functions
+  // have been defined. handleLoopSequence serves as the recurisve entry point
+  // and tries to match the input AST to the canonical loop sequence grammar
+  // structure
+
+  auto NLCV = NestedLoopCounterVisitor();
+  // Helper functions to validate canonical loop sequence grammar is valid
+  auto isLoopSequenceDerivation = [](auto *Child) {
+    return isa<ForStmt>(Child) || isa<CXXForRangeStmt>(Child) ||
+           isa<OMPLoopTransformationDirective>(Child);
+  };
+  auto isLoopGeneratingStmt = [](auto *Child) {
+    return isa<OMPLoopTransformationDirective>(Child);
+  };
+
+  // Helper Lambda to handle storing initialization and body statements for both
+  // ForStmt and CXXForRangeStmt and checks for any possible mismatch between
+  // induction variables types
+  QualType BaseInductionVarType;
+  auto storeLoopStatements = [&OriginalInits, &ForStmts, &BaseInductionVarType,
+                              this, &Context](Stmt *LoopStmt) {
+    if (auto *For = dyn_cast<ForStmt>(LoopStmt)) {
+      OriginalInits.back().push_back(For->getInit());
+      ForStmts.push_back(For);
+      // Extract induction variable
+      if (auto *InitStmt = dyn_cast_or_null<DeclStmt>(For->getInit())) {
+        if (auto *InitDecl = dyn_cast<VarDecl>(InitStmt->getSingleDecl())) {
+          QualType InductionVarType = InitDecl->getType().getCanonicalType();
+
+          // Compare with first loop type
+          if (BaseInductionVarType.isNull()) {
+            BaseInductionVarType = InductionVarType;
+          } else if (!Context.hasSameType(BaseInductionVarType,
+                                          InductionVarType)) {
+            Diag(InitDecl->getBeginLoc(),
+                 diag::warn_omp_different_loop_ind_var_types)
+                << getOpenMPDirectiveName(OMPD_fuse) << BaseInductionVarType
+                << InductionVarType;
+          }
+        }
+      }
+
+    } else {
+      assert(isa<CXXForRangeStmt>(LoopStmt) &&
+             "Expected canonical for or range-based for loops.");
+      auto *CXXFor = dyn_cast<CXXForRangeStmt>(LoopStmt);
+      OriginalInits.back().push_back(CXXFor->getBeginStmt());
+      ForStmts.push_back(CXXFor);
+    }
+  };
+  // Helper lambda functions to encapsulate the processing of different
+  // derivations of the canonical loop sequence grammar
+  //
+  // Modularized code for handling loop generation and transformations
+  auto handleLoopGeneration = [&storeLoopStatements, &LoopHelpers,
+                               &OriginalInits, &LoopSeqSize, &NumLoops, Kind,
+                               &TmpDSA, &OnTransformationCallback,
+                               this](Stmt *Child) {
+    auto LoopTransform = dyn_cast<OMPLoopTransformationDirective>(Child);
+    Stmt *TransformedStmt = LoopTransform->getTransformedStmt();
+    unsigned NumGeneratedLoopNests = LoopTransform->getNumGeneratedLoopNests();
+
+    // Handle the case where transformed statement is not available due to
+    // dependent contexts
+    if (!TransformedStmt) {
+      if (NumGeneratedLoopNests > 0)
+        return true;
+      // Unroll full
+      else {
+        Diag(Child->getBeginLoc(), diag::err_omp_not_for)
+            << 0 << getOpenMPDirectiveName(Kind);
+        return false;
+      }
+    }
+    // Handle loop transformations with multiple loop nests
+    // Unroll full
+    if (NumGeneratedLoopNests <= 0) {
+      Diag(Child->getBeginLoc(), diag::err_omp_not_for)
+          << 0 << getOpenMPDirectiveName(Kind);
+      return false;
+      // Future loop transformations that generate multiple canonical loops
+    } else if (NumGeneratedLoopNests > 1) {
+      llvm_unreachable("Multiple canonical loop generating transformations "
+                       "like loop splitting are not yet supported");
+    }
+
+    // Process the transformed loop statement
+    Child = TransformedStmt;
+    OriginalInits.emplace_back();
+    LoopHelpers.emplace_back();
+    OnTransformationCallback(LoopTransform);
+
+    unsigned IsCanonical =
+        checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack,
+                        TmpDSA, LoopHelpers[LoopSeqSize]);
+
+    if (!IsCanonical) {
+      Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop)
+          << getOpenMPDirectiveName(Kind);
+      return false;
+    }
+    storeLoopStatements(TransformedStmt);
+    NumLoops += LoopTransform->getNumGeneratedLoops();
+    return true;
+  };
+
+  // Modularized code for handling regular canonical loops
+  auto handleRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits,
+                            &LoopSeqSize, &NumLoops, Kind, &TmpDSA, &NLCV,
+                            this](Stmt *Child) {
+    OriginalInits.emplace_back();
+    LoopHelpers.emplace_back();
+    unsigned IsCanonical =
+        checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack,
+                        TmpDSA, LoopHelpers[LoopSeqSize]);
+
+    if (!IsCanonical) {
+      Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop)
+          << getOpenMPDirectiveName(Kind);
+      return false;
+    }
+    storeLoopStatements(Child);
+    NumLoops += NLCV.TraverseStmt(Child);
+    return true;
+  };
+
+  // Helper function to process a Loop Sequence Recursively
+  auto handleLoopSequence = [&](Stmt *LoopSeqStmt,
+                                auto &handleLoopSequenceCallback) -> bool {
+    for (auto *Child : LoopSeqStmt->children()) {
+      if (!Child)
+        continue;
+
+      // Skip over non-loop-sequence statements
+      if (!isLoopSequenceDerivation(Child)) {
+        Child = Child->IgnoreContainers();
+
+        // Ignore empty compound statement
+        if (!Child)
+          continue;
+
+        // In the case of a nested loop sequence ignoring containers would not
+        // be enough, a recurisve transversal of the loop sequence is required
+        if (isa<CompoundStmt>(Child)) {
+          if (!handleLoopSequenceCallback(Child, handleLoopSequenceCallback))
+            return false;
+          // Already been treated, skip this children
+          continue;
+        }
+      }
+      // Regular loop sequence handling
+      if (isLoopSequenceDerivation(Child)) {
+        if (isLoopGeneratingStmt(Child)) {
+          if (!handleLoopGeneration(Child)) {
+            return false;
+          }
+        } else {
+          if (!handleRegularLoop(Child)) {
+            return false;
+          }
+        }
+        ++LoopSeqSize;
+      } else {
+        // Report error for invalid statement inside canonical loop sequence
+        Diag(Child->getBeginLoc(), diag::err_omp_not_for)
+            << 0 << getOpenMPDirectiveName(Kind);
+        return false;
+      }
+    }
+    return true;
+  };
+
+  // Recursive entry point to process the main loop sequence
+  if (!handleLoopSequence(AStmt, handleLoopSequence)) {
+    return false;
+  }
+
+  if (LoopSeqSize <= 0) {
+    Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence)
+        << getOpenMPDirectiveName(Kind);
+    return false;
+  }
+  return true;
+}
+
 /// Add preinit statements that need to be propageted from the selected loop.
 static void addLoopPreInits(ASTContext &Context,
                             OMPLoopBasedDirective::HelperExprs &LoopHelper,
@@ -15462,6 +15728,340 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective(
                                          buildPreInits(Context, PreInits));
 }
 
+StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
+                                                Stmt *AStmt,
+                                                SourceLocation StartLoc,
+                                                SourceLocation EndLoc) {
+  ASTContext &Context = getASTContext();
+  DeclContext *CurrContext = SemaRef.CurContext;
+  Scope *CurScope = SemaRef.getCurScope();
+  CaptureVars CopyTransformer(SemaRef);
+
+  // Ensure the structured block is not empty
+  if (!AStmt) {
+    return StmtError();
+  }
+  // Validate that the potential loop sequence is transformable for fusion
+  // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops
+  SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers;
+  SmallVector<Stmt *> LoopStmts;
+  SmallVector<SmallVector<Stmt *, 0>> OriginalInits;
+
+  unsigned NumLoops;
+  // TODO: Support looprange clause using LoopSeqSize
+  unsigned LoopSeqSize;
+  if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops,
+                                      LoopHelpers, LoopStmts, OriginalInits,
+                                      Context)) {
+    return StmtError();
+  }
+
+  // Defer transformation in dependent contexts
+  if (CurrContext->isDependentContext()) {
+    return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses,
+                                    NumLoops, 1, AStmt, nullptr, nullptr);
+  }
+  assert(LoopHelpers.size() == LoopSeqSize &&
+         "Expecting loop iteration space dimensionality to match number of "
+         "affected loops");
+  assert(OriginalInits.size() == LoopSeqSize &&
+         "Expecting loop iteration space dimensionality to match number of "
+         "affected loops");
+
+  // PreInits hold a sequence of variable declarations that must be executed
+  // before the fused loop begins. These include bounds, strides, and other
+  // helper variables required for the transformation.
+  SmallVector<Stmt *> PreInits;
+
+  // Select the type with the largest bit width among all induction variables
+  QualType IVType = LoopHelpers[0].IterationVarRef->getType();
+  for (unsigned int I = 1; I < LoopSeqSize; ++I) {
+    QualType CurrentIVType = LoopHelpers[I].IterationVarRef->getType();
+    if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) {
+      IVType = CurrentIVType;
+    }
+  }
+  uint64_t IVBitWidth = Context.getIntWidth(IVType);
+
+  // Create pre-init declarations for all loops lower bounds, upper bounds,
+  // strides and num-iterations
+  SmallVector<VarDecl *, 4> LBVarDecls;
+  SmallVector<VarDecl *, 4> STVarDecls;
+  SmallVector<VarDecl *, 4> NIVarDecls;
+  SmallVector<VarDecl *, 4> UBVarDecls;
+  SmallVector<VarDecl *, 4> IVVarDecls;
+
+  // Helper lambda to create variables for bounds, strides, and other
+  // expressions. Generates both the variable declaration and the corresponding
+  // initialization statement.
+  auto CreateHelperVarAndStmt =
+      [&SemaRef = this->SemaRef, &Context, &CopyTransformer,
+       &IVType](Expr *ExprToCopy, const std::string &BaseName, unsigned I,
+                bool NeedsNewVD = false) {
+        Expr *TransformedExpr =
+            AssertSuccess(CopyTransformer.TransformExpr(ExprToCopy));
+        if (!TransformedExpr)
+          return std::pair<VarDecl *, StmtResult>(nullptr, StmtError());
+
+        auto Name = (Twine(".omp.") + BaseName + std::to_string(I)).str();
+
+        VarDecl *VD;
+        if (NeedsNewVD) {
+          VD = buildVarDecl(SemaRef, SourceLocation(), IVType, Name);
+          SemaRef.AddInitializerToDecl(VD, TransformedExpr, false);
+
+        } else {
+          // Create a unique variable name
+          DeclRefExpr *DRE = cast<DeclRefExpr>(TransformedExpr);
+          VD = cast<VarDecl>(DRE->getDecl());
+          VD->setDeclName(&SemaRef.PP.getIdentifierTable().get(Name));
+        }
+        // Create the corresponding declaration statement
+        StmtResult DeclStmt = new (Context) class DeclStmt(
+            DeclGroupRef(VD), SourceLocation(), SourceLocation());
+        return std::make_pair(VD, DeclStmt);
+      };
+
+  // Process each single loop to generate and collect declarations
+  // and statements for all helper expressions
+  for (unsigned int I = 0; I < LoopSeqSize; ++I) {
+    addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I],
+                    PreInits);
+
+    auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", I);
+    auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", I);
+    auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", I);
+    auto [NIVD, NIDStmt] =
+        CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", I, true);
+    auto [IVVD, IVDStmt] =
+        CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", I);
+
+    if (!LBVD || !STVD || !NIVD || !IVVD)
+      return StmtError();
+
+    UBVarDecls.push_back(UBVD);
+    LBVarDecls.push_back(LBVD);
+    STVarDecls.push_back(STVD);
+    NIVarDecls.push_back(NIVD);
+    IVVarDecls.push_back(IVVD);
+
+    PreInits.push_back(UBDStmt.get());
+    PreInits.push_back(LBDStmt.get());
+    PreInits.push_back(STDStmt.get());
+    PreInits.push_back(NIDStmt.get());
+    PreInits.push_back(IVDStmt.get());
+  }
+
+  auto MakeVarDeclRef = [&SemaRef = this->SemaRef](VarDecl *VD) {
+    return buildDeclRefExpr(SemaRef, VD, VD->getType(), VD->getLocation(),
+                            false);
+  };
+
+  // Following up the creation of the final fused loop will be performed
+  // which has the following shape (considering the selected loops):
+  //
+  // for (fuse.index = 0; fuse.index < max(ni0, ni1..., nik); ++fuse.index) {
+  //    if (fuse.index < ni0){
+  //      iv0 = lb0 + st0 * fuse.index;
+  //      original.index0 = iv0
+  //      body(0);
+  //    }
+  //    if (fuse.index < ni1){
+  //      iv1 = lb1 + st1 * fuse.index;
+  //      original.index1 = iv1
+  //      body(1);
+  //    }
+  //
+  //    ...
+  //
+  //    if (fuse.index < nik){
+  //      ivk = lbk + stk * fuse.index;
+  //      original.indexk = ivk
+  //      body(k);  Expr *InitVal = IntegerLiteral::Create(Context,
+  //      llvm::APInt(IVWidth, 0),
+
+  //    }
+
+  // 1. Create the initialized fuse index
+  const std::string IndexName = Twine(".omp.fuse.index").str();
+  Expr *InitVal = IntegerLiteral::Create(Context, llvm::APInt(IVBitWidth, 0),
+                                         IVType, SourceLocation());
+  VarDecl *IndexDecl =
+      buildVarDecl(SemaRef, {}, IVType, IndexName, nullptr, nullptr);
+  SemaRef.AddInitializerToDecl(IndexDecl, InitVal, false);
+  StmtResult InitStmt = new (Context)
+      DeclStmt(DeclGroupRef(IndexDecl), SourceLocation(), SourceLocation());
+
+  if (!InitStmt.isUsable())
+    return StmtError();
+
+  auto MakeIVRef = [&SemaRef = this->SemaRef, IndexDecl, IVType,
+                    Loc = InitVal->getExprLoc()]() {
+    return buildDeclRefExpr(SemaRef, IndexDecl, IVType, Loc, false);
+  };
+
+  // 2. Iteratively compute the max number of logical iterations Max(NI_1, NI_2,
+  // ..., NI_k)
+  //
+  // This loop accumulates the maximum value across multiple expressions,
+  // ensuring each step constructs a unique AST node for correctness. By using
+  // intermediate temporary variables and conditional operators, we maintain
+  // distinct nodes and avoid duplicating subtrees,  For instance, max(a,b,c):
+  //   omp.temp0 = max(a, b)
+  //   omp.temp1 = max(omp.temp0, c)
+  //   omp.fuse.max = max(omp.temp1, omp.temp0)
+
+  ExprResult MaxExpr;
+  for (unsigned I = 0; I < LoopSeqSize; ++I) {
+    DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[I]);
+    QualType NITy = NIRef->getType();
+
+    if (MaxExpr.isUnset()) {
+      // Initialize MaxExpr with the first NI expression
+      MaxExpr = NIRef;
+    } else {
+      // Create a new acummulator variable t_i = MaxExpr
+      std::string TempName = (Twine(".omp.temp.") + Twine(I)).str();
+      VarDecl *TempDecl =
+          buildVarDecl(SemaRef, {}, NITy, TempName, nullptr, nullptr);
+      TempDecl->setInit(MaxExpr.get());
+      DeclRefExpr *TempRef =
+          buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false);
+      DeclRefExpr *TempRef2 =
+          buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false);
+      // Add a DeclStmt to PreInits to ensure the variable is declared.
+      StmtResult TempStmt = new (Context)
+          DeclStmt(DeclGroupRef(TempDecl), SourceLocation(), SourceLocation());
+
+      if (!TempStmt.isUsable())
+        return StmtError();
+      PreInits.push_back(TempStmt.get());
+
+      // Build MaxExpr <-(MaxExpr > NIRef ? MaxExpr : NIRef)
+      ExprResult Comparison =
+          SemaRef.BuildBinOp(nullptr, SourceLocation(), BO_GT, TempRef, NIRef);
+      // Handle any errors in Comparison creation
+      if (!Comparison.isUsable())
+        return StmtError();
+
+      DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[I]);
+      // Update MaxExpr using a conditional expression to hold the max value
+      MaxExpr = new (Context) ConditionalOperator(
+          Comparison.get(), SourceLocation(), TempRef2, SourceLocation(),
+          NIRef2->getExprStmt(), NITy, VK_LValue, OK_Ordinary);
+
+      if (!MaxExpr.isUsable())
+        return StmtError();
+    }
+  }
+  if (!MaxExpr.isUsable())
+    return StmtError();
+
+  // 3. Declare the max variable
+  const std::string MaxName = Twine(".omp.fuse.max").str();
+  VarDecl *MaxDecl =
+      buildVarDecl(SemaRef, {}, IVType, MaxName, nullptr, nullptr);
+  MaxDecl->setInit(MaxExpr.get());
+  DeclRefExpr *MaxRef = buildDeclRefExpr(SemaRef, MaxDecl, IVType, {}, false);
+  StmtResult MaxStmt = new (Context)
+      DeclStmt(DeclGroupRef(MaxDecl), SourceLocation(), SourceLocation());
+
+  if (MaxStmt.isInvalid())
+    return StmtError();
+  PreInits.push_back(MaxStmt.get());
+
+  // 4. Create condition Expr: index < n_max
+  ExprResult CondExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT,
+                                           MakeIVRef(), MaxRef);
+  if (!CondExpr.isUsable())
+    return StmtError();
+  // 5. Increment Expr: ++index
+  ExprResult IncrExpr =
+      SemaRef.BuildUnaryOp(CurScope, SourceLocation(), UO_PreInc, MakeIVRef());
+  if (!IncrExpr.isUsable())
+    return StmtError();
+
+  // 6. Build the Fused Loop Body
+  // The final fused loop iterates over the maximum logical range. Inside the
+  // loop, each original loop's index is calculated dynamically, and its body
+  // is executed conditionally.
+  //
+  // Each sub-loop's body is guarded by a conditional statement to ensure
+  // it executes only within its logical iteration range:
+  //
+  //    if (fuse.index < ni_k){
+  //      iv_k = lb_k + st_k * fuse.index;
+  //      original.index = iv_k
+  //      body(k);
+  //    }
+
+  CompoundStmt *FusedBody = nullptr;
+  SmallVector<Stmt *, 4> FusedBodyStmts;
+  for (unsigned I = 0; I < LoopSeqSize; ++I) {
+
+    // Assingment of the original sub-loop index to compute the logical index
+    // IV_k = LB_k + omp.fuse.index * ST_k
+
+    ExprResult IdxExpr =
+        SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul,
+                           MakeVarDeclRef(STVarDecls[I]), MakeIVRef());
+    if (!IdxExpr.isUsable())
+      return StmtError();
+    IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Add,
+                                 MakeVarDeclRef(LBVarDecls[I]), IdxExpr.get());
+
+    if (!IdxExpr.isUsable())
+      return StmtError();
+    IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Assign,
+                                 MakeVarDeclRef(IVVarDecls[I]), IdxExpr.get());
+    if (!IdxExpr.isUsable())
+      return StmtError();
+
+    // Update the original i_k = IV_k
+    SmallVector<Stmt *, 4> BodyStmts;
+    BodyStmts.push_back(IdxExpr.get());
+    llvm::append_range(BodyStmts, LoopHelpers[I].Updates);
+
+    if (auto *SourceCXXFor = dyn_cast<CXXForRangeStmt>(LoopStmts[I]))
+      BodyStmts.push_back(SourceCXXFor->getLoopVarStmt());
+
+    Stmt *Body = (isa<ForStmt>(LoopStmts[I]))
+                     ? cast<ForStmt>(LoopStmts[I])->getBody()
+                     : cast<CXXForRangeStmt>(LoopStmts[I])->getBody();
+
+    BodyStmts.push_back(Body);
+
+    CompoundStmt *CombinedBody =
+        CompoundStmt::Create(Context, BodyStmts, FPOptionsOverride(),
+                             SourceLocation(), SourceLocation());
+    ExprResult Condition =
+        SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, MakeIVRef(),
+                           MakeVarDeclRef(NIVarDecls[I]));
+
+    if (!Condition.isUsable())
+      return StmtError();
+
+    IfStmt *IfStatement = IfStmt::Create(
+        Context, SourceLocation(), IfStatementKind::Ordinary, nullptr, nullptr,
+        Condition.get(), SourceLocation(), SourceLocation(), CombinedBody,
+        SourceLocation(), nullptr);
+
+    FusedBodyStmts.push_back(IfStatement);
+  }
+  FusedBody = CompoundStmt::Create(Context, FusedBodyStmts, FPOptionsOverride(),
+                                   SourceLocation(), SourceLocation());
+
+  // 7. Construct the final fused loop
+  ForStmt *FusedForStmt = new (Context)
+      ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr, IncrExpr.get(),
+              FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(),
+              IncrExpr.get()->getEndLoc());
+
+  return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, NumLoops,
+                                  1, AStmt, FusedForStmt,
+                                  buildPreInits(Context, PreInits));
+}
+
 OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind,
                                                    Expr *Expr,
                                                    SourceLocation StartLoc,
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 335e21d927b76..034b0c8243667 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -9666,6 +9666,17 @@ StmtResult TreeTransform<Derived>::TransformOMPInterchangeDirective(
   return Res;
 }
 
+template <typename Derived>
+StmtResult
+TreeTransform<Derived>::TransformOMPFuseDirective(OMPFuseDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().OpenMP().StartOpenMPDSABlock(
+      D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
 template <typename Derived>
 StmtResult
 TreeTransform<Derived>::TransformOMPForDirective(OMPForDirective *D) {
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 0ba0378754eb4..6762d11d6b73e 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2449,6 +2449,7 @@ void ASTStmtReader::VisitOMPLoopTransformationDirective(
     OMPLoopTransformationDirective *D) {
   VisitOMPLoopBasedDirective(D);
   D->setNumGeneratedLoops(Record.readUInt32());
+  D->setNumGeneratedLoopNests(Record.readUInt32());
 }
 
 void ASTStmtReader::VisitOMPTileDirective(OMPTileDirective *D) {
@@ -2471,6 +2472,10 @@ void ASTStmtReader::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) {
   VisitOMPLoopTransformationDirective(D);
 }
 
+void ASTStmtReader::VisitOMPFuseDirective(OMPFuseDirective *D) {
+  VisitOMPLoopTransformationDirective(D);
+}
+
 void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) {
   VisitOMPLoopDirective(D);
   D->setHasCancel(Record.readBool());
@@ -3613,6 +3618,12 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       S = OMPReverseDirective::CreateEmpty(Context);
       break;
     }
+    case STMT_OMP_FUSE_DIRECTIVE: {
+      unsigned NumLoops = Record[ASTStmtReader::NumStmtFields];
+      unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1];
+      S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops);
+      break;
+    }
 
     case STMT_OMP_INTERCHANGE_DIRECTIVE: {
       unsigned NumLoops = Record[ASTStmtReader::NumStmtFields];
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index b9eabd5ddb64c..8b909d5c93686 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -2454,6 +2454,7 @@ void ASTStmtWriter::VisitOMPLoopTransformationDirective(
     OMPLoopTransformationDirective *D) {
   VisitOMPLoopBasedDirective(D);
   Record.writeUInt32(D->getNumGeneratedLoops());
+  Record.writeUInt32(D->getNumGeneratedLoopNests());
 }
 
 void ASTStmtWriter::VisitOMPTileDirective(OMPTileDirective *D) {
@@ -2481,6 +2482,11 @@ void ASTStmtWriter::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) {
   Code = serialization::STMT_OMP_INTERCHANGE_DIRECTIVE;
 }
 
+void ASTStmtWriter::VisitOMPFuseDirective(OMPFuseDirective *D) {
+  VisitOMPLoopTransformationDirective(D);
+  Code = serialization::STMT_OMP_FUSE_DIRECTIVE;
+}
+
 void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) {
   VisitOMPLoopDirective(D);
   Record.writeBool(D->hasCancel());
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 1afd4b52eb354..036945b2d1700 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1817,6 +1817,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::OMPStripeDirectiveClass:
     case Stmt::OMPTileDirectiveClass:
     case Stmt::OMPInterchangeDirectiveClass:
+    case Stmt::OMPFuseDirectiveClass:
     case Stmt::OMPInteropDirectiveClass:
     case Stmt::OMPDispatchDirectiveClass:
     case Stmt::OMPMaskedDirectiveClass:
diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp
new file mode 100644
index 0000000000000..43ce815dab024
--- /dev/null
+++ b/clang/test/OpenMP/fuse_ast_print.cpp
@@ -0,0 +1,278 @@
+// Check no warnings/errors
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -fsyntax-only -verify %s
+// expected-no-diagnostics
+
+// Check AST and unparsing 
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-dump  %s | FileCheck %s --check-prefix=DUMP
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT
+
+// Check same results after serialization round-trip 
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -emit-pch -o %t %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-print    %s | FileCheck %s --check-prefix=PRINT
+
+#ifndef HEADER
+#define HEADER 
+
+// placeholder for loop body code
+extern "C" void body(...);
+
+// PRINT-LABEL: void foo1(
+// DUMP-LABEL: FunctionDecl {{.*}} foo1
+void foo1() {
+    // PRINT: #pragma omp fuse
+    // DUMP:  OMPFuseDirective
+    #pragma omp fuse 
+    // PRINT: {
+    // DUMP: CompoundStmt       
+    {
+        // PRINT: for (int i = 0; i < 10; i += 2)
+        // DUMP: ForStmt
+        for (int i = 0; i < 10; i += 2)
+            // PRINT: body(i)
+            // DUMP: CallExpr
+            body(i);
+        // PRINT: for (int j = 10; j > 0; --j)
+        // DUMP: ForStmt
+        for (int j = 10; j > 0; --j)
+            // PRINT: body(j)
+            // DUMP: CallExpr
+            body(j);
+        // PRINT: for (int k = 0; k <= 10; ++k)
+        // DUMP: ForStmt
+        for (int k = 0; k <= 10; ++k)
+            // PRINT: body(k)
+            // DUMP: CallExpr
+            body(k);
+
+    }
+
+}
+
+// PRINT-LABEL: void foo2(
+// DUMP-LABEL: FunctionDecl {{.*}} foo2
+void foo2() {
+    // PRINT: #pragma omp unroll partial(4)
+    // DUMP: OMPUnrollDirective
+    // DUMP-NEXT: OMPPartialClause
+    // DUMP-NEXT: ConstantExpr
+    // DUMP-NEXT: value: Int 4
+    // DUMP-NEXT: IntegerLiteral {{.*}} 4
+    #pragma omp unroll partial(4)
+    // PRINT: #pragma omp fuse
+    // DUMP-NEXT: OMPFuseDirective 
+    #pragma omp fuse 
+    // PRINT: {
+    // DUMP: CompoundStmt       
+    {
+        // PRINT: for (int i = 0; i < 10; i += 2)
+        // DUMP: ForStmt
+        for (int i = 0; i < 10; i += 2)
+            // PRINT: body(i)
+            // DUMP: CallExpr
+            body(i);
+        // PRINT: for (int j = 10; j > 0; --j)
+        // DUMP: ForStmt
+        for (int j = 10; j > 0; --j)
+            // PRINT: body(j)
+            // DUMP: CallExpr
+            body(j);  
+    }    
+    
+}
+
+//PRINT-LABEL: void foo3(
+//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo3
+template<int Factor1, int Factor2> 
+void foo3() {
+    // PRINT:  #pragma omp fuse
+    // DUMP: OMPFuseDirective
+    #pragma omp fuse 
+    // PRINT: {
+    // DUMP: CompoundStmt       
+    {
+        // PRINT: #pragma omp unroll partial(Factor1)
+        // DUMP: OMPUnrollDirective
+        #pragma omp unroll partial(Factor1)
+        // PRINT: for (int i = 0; i < 12; i += 1)
+        // DUMP: ForStmt
+        for (int i = 0; i < 12; i += 1)
+            // PRINT: body(i)
+            // DUMP: CallExpr
+            body(i);
+        // PRINT: #pragma omp unroll partial(Factor2)
+        // DUMP: OMPUnrollDirective
+        #pragma omp unroll partial(Factor2)
+        // PRINT: for (int k = 0; k <= 10; ++k)
+        // DUMP: ForStmt
+        for (int k = 0; k <= 10; ++k)
+            // PRINT: body(k)
+            // DUMP: CallExpr
+            body(k);
+
+    }
+}
+
+// Also test instantiating the template.
+void tfoo3() {
+    foo3<4,2>();
+}
+
+//PRINT-LABEL: void foo4(
+//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo4
+template<typename T, T Step> 
+void foo4(int start, int end) {
+    // PRINT:  #pragma omp fuse
+    // DUMP: OMPFuseDirective
+    #pragma omp fuse 
+    // PRINT: {
+    // DUMP: CompoundStmt       
+    {
+        // PRINT: for (T i = start; i < end; i += Step)
+        // DUMP: ForStmt
+        for (T i = start; i < end; i += Step)
+            // PRINT: body(i)
+            // DUMP: CallExpr
+            body(i);
+
+        // PRINT: for (T j = end; j > start; j -= Step)
+        // DUMP: ForStmt 
+        for (T j = end; j > start; j -= Step) {
+            // PRINT: body(j)
+            // DUMP: CallExpr
+            body(j);
+        }
+
+    }
+}
+
+// Also test instantiating the template.
+void tfoo4() {
+    foo4<int, 4>(0, 64);
+}
+
+
+
+// PRINT-LABEL: void foo5(
+// DUMP-LABEL: FunctionDecl {{.*}} foo5
+void foo5() {
+    double arr[128], arr2[128];
+    // PRINT: #pragma omp fuse
+    // DUMP:  OMPFuseDirective
+    #pragma omp fuse 
+    // PRINT: {
+    // DUMP: CompoundStmt       
+    {
+        // PRINT-NEXT: for (auto &&a : arr)
+        // DUMP-NEXT: CXXForRangeStmt
+        for (auto &&a: arr)
+            // PRINT: body(a)
+            // DUMP: CallExpr
+            body(a);
+        // PRINT: for (double v = 42; auto &&b : arr)
+        // DUMP: CXXForRangeStmt
+        for (double v = 42; auto &&b: arr)
+            // PRINT: body(b, v);
+            // DUMP: CallExpr
+            body(b, v);
+        // PRINT: for (auto &&c : arr2)
+        // DUMP: CXXForRangeStmt
+        for (auto &&c: arr2)
+            // PRINT: body(c)
+            // DUMP: CallExpr
+            body(c);
+
+    }
+
+}
+
+// PRINT-LABEL: void foo6(
+// DUMP-LABEL: FunctionDecl {{.*}} foo6
+void foo6() {
+    // PRINT: #pragma omp fuse
+    // DUMP: OMPFuseDirective
+    #pragma omp fuse 
+    // PRINT: {
+    // DUMP: CompoundStmt
+    {
+        // PRINT: #pragma omp fuse
+        // DUMP: OMPFuseDirective
+        #pragma omp fuse 
+        // PRINT: {
+        // DUMP: CompoundStmt
+        {
+            // PRINT: for (int i = 0; i <= 10; ++i)
+            // DUMP: ForStmt
+            for (int i = 0; i <= 10; ++i)
+                body(i);
+            // PRINT: for (int j = 0; j < 100; ++j)
+            // DUMP: ForStmt
+            for(int j = 0; j < 100; ++j)
+                body(j);
+        }
+        // PRINT: #pragma omp unroll partial(4)
+        // DUMP: OMPUnrollDirective
+        #pragma omp unroll partial(4)
+        // PRINT: for (int k = 0; k < 250; ++k)
+        // DUMP: ForStmt
+        for (int k = 0; k < 250; ++k) 
+            body(k);
+    }
+}
+
+// PRINT-LABEL: void foo7(
+// DUMP-LABEL: FunctionDecl {{.*}} foo7
+void foo7() {
+    // PRINT: #pragma omp fuse
+    // DUMP:  OMPFuseDirective
+    #pragma omp fuse 
+    // PRINT: {
+    // DUMP: CompoundStmt       
+    {
+        // PRINT: {
+        // DUMP: CompoundStmt   
+        {
+            // PRINT: {
+            // DUMP: CompoundStmt   
+            {
+                // PRINT: for (int i = 0; i < 10; i += 2)
+                // DUMP: ForStmt
+                for (int i = 0; i < 10; i += 2)
+                    // PRINT: body(i)
+                    // DUMP: CallExpr
+                    body(i);
+                // PRINT: for (int j = 10; j > 0; --j)
+                // DUMP: ForStmt
+                for (int j = 10; j > 0; --j)
+                    // PRINT: body(j)
+                    // DUMP: CallExpr
+                    body(j);
+            }
+        }
+        // PRINT: {
+        // DUMP: CompoundStmt   
+        {
+            // PRINT: {
+            // DUMP: CompoundStmt   
+            {
+                // PRINT: {
+                // DUMP: CompoundStmt   
+                {
+                    // PRINT: for (int k = 0; k <= 10; ++k)
+                    // DUMP: ForStmt
+                    for (int k = 0; k <= 10; ++k)
+                        // PRINT: body(k)
+                        // DUMP: CallExpr
+                        body(k);
+                }
+            }
+        }
+    }
+
+}
+
+
+
+
+
+#endif
\ No newline at end of file
diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp
new file mode 100644
index 0000000000000..6c1e21092da43
--- /dev/null
+++ b/clang/test/OpenMP/fuse_codegen.cpp
@@ -0,0 +1,1511 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 5
+// expected-no-diagnostics
+
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2
+
+#ifndef HEADER
+#define HEADER
+
+//placeholder for loop body code.
+extern "C" void body(...) {}
+
+extern "C" void foo1(int start1, int end1, int step1, int start2, int end2, int step2) {
+    int i,j;
+    #pragma omp fuse
+    {
+        for(i = start1; i < end1; i += step1) body(i);
+        for(j = start2; j < end2; j += step2) body(j);
+    }
+
+}
+
+template <typename T>
+void foo2(T start, T end, T step){
+    T i,j,k;
+    #pragma omp fuse
+    {
+        for(i = start; i < end; i += step) body(i);
+        for(j = end; j > start; j -= step) body(j);
+        for(k = start+step; k < end+step; k += step) body(k);
+    }
+}
+
+extern "C" void tfoo2() {
+    foo2<int>(0, 64, 4);
+}
+
+extern "C" void foo3() {
+    double arr[256];
+    #pragma omp fuse
+    {
+        #pragma omp fuse
+        {
+            for(int i = 0; i < 128; ++i) body(i);
+            for(int j = 0; j < 256; j+=2) body(j);
+        }
+        for(int c = 42; auto &&v: arr) body(c,v);
+        for(int cc = 37; auto &&vv: arr) body(cc, vv);
+    }
+}
+
+
+#endif
+// CHECK1-LABEL: define dso_local void @body(
+// CHECK1-SAME: ...) #[[ATTR0:[0-9]+]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @foo1(
+// CHECK1-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
+// CHECK1-NEXT:    [[START1_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[END1_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[STEP1_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[START2_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[END2_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[STEP2_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store i32 [[START1]], ptr [[START1_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[END1]], ptr [[END1_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[START2]], ptr [[START2_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[END2]], ptr [[END2_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK1-NEXT:    [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT:    [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK1-NEXT:    [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK1-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT:    store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT:    [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK1-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP10]], ptr [[J]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT:    [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK1-NEXT:    [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK1-NEXT:    [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK1-NEXT:    store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT:    store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT:    [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK1-NEXT:    store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]]
+// CHECK1-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1:       [[COND_TRUE]]:
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END:.*]]
+// CHECK1:       [[COND_FALSE]]:
+// CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END]]
+// CHECK1:       [[COND_END]]:
+// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ]
+// CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK1:       [[FOR_COND]]:
+// CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
+// CHECK1-NEXT:    br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1:       [[FOR_BODY]]:
+// CHECK1-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]]
+// CHECK1-NEXT:    br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK1:       [[IF_THEN]]:
+// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]]
+// CHECK1-NEXT:    [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]]
+// CHECK1-NEXT:    store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT:    [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]]
+// CHECK1-NEXT:    [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]]
+// CHECK1-NEXT:    store i32 [[ADD20]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP35:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP35]])
+// CHECK1-NEXT:    br label %[[IF_END]]
+// CHECK1:       [[IF_END]]:
+// CHECK1-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]]
+// CHECK1-NEXT:    br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]]
+// CHECK1:       [[IF_THEN22]]:
+// CHECK1-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]]
+// CHECK1-NEXT:    [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]]
+// CHECK1-NEXT:    store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT:    [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]]
+// CHECK1-NEXT:    store i32 [[ADD26]], ptr [[J]], align 4
+// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP44]])
+// CHECK1-NEXT:    br label %[[IF_END27]]
+// CHECK1:       [[IF_END27]]:
+// CHECK1-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK1:       [[FOR_INC]]:
+// CHECK1-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add i32 [[TMP45]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK1:       [[FOR_END]]:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @tfoo2(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
+// CHECK1-NEXT:    call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4)
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_(
+// CHECK1-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
+// CHECK1-NEXT:    [[START_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[END_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[STEP_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[K:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTNEW_STEP21:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_UB2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store i32 [[START]], ptr [[START_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[END]], ptr [[END_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[STEP]], ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK1-NEXT:    [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT:    [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK1-NEXT:    [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK1-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT:    store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT:    [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK1-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP10]], ptr [[J]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT:    [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK1-NEXT:    [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK1-NEXT:    [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK1-NEXT:    store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT:    store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT:    [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK1-NEXT:    store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK1-NEXT:    store i32 [[ADD16]], ptr [[K]], align 4
+// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT:    [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK1-NEXT:    store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT:    [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
+// CHECK1-NEXT:    store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK1-NEXT:    [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT:    [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]]
+// CHECK1-NEXT:    [[SUB24:%.*]] = sub i32 [[SUB23]], 1
+// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT:    [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]]
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT:    [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]]
+// CHECK1-NEXT:    [[SUB27:%.*]] = sub i32 [[DIV26]], 1
+// CHECK1-NEXT:    store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK1-NEXT:    store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB2]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST2]], align 4
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK1-NEXT:    [[ADD28:%.*]] = add i32 [[TMP32]], 1
+// CHECK1-NEXT:    store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]]
+// CHECK1-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1:       [[COND_TRUE]]:
+// CHECK1-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END:.*]]
+// CHECK1:       [[COND_FALSE]]:
+// CHECK1-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END]]
+// CHECK1:       [[COND_END]]:
+// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ]
+// CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT:    [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]]
+// CHECK1-NEXT:    br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]]
+// CHECK1:       [[COND_TRUE30]]:
+// CHECK1-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT:    br label %[[COND_END32:.*]]
+// CHECK1:       [[COND_FALSE31]]:
+// CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT:    br label %[[COND_END32]]
+// CHECK1:       [[COND_END32]]:
+// CHECK1-NEXT:    [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ]
+// CHECK1-NEXT:    store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK1:       [[FOR_COND]]:
+// CHECK1-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT:    br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1:       [[FOR_BODY]]:
+// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]]
+// CHECK1-NEXT:    br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK1:       [[IF_THEN]]:
+// CHECK1-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]]
+// CHECK1-NEXT:    [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]]
+// CHECK1-NEXT:    store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT:    [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]]
+// CHECK1-NEXT:    [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]]
+// CHECK1-NEXT:    store i32 [[ADD38]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP52:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP52]])
+// CHECK1-NEXT:    br label %[[IF_END]]
+// CHECK1:       [[IF_END]]:
+// CHECK1-NEXT:    [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]]
+// CHECK1-NEXT:    br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]]
+// CHECK1:       [[IF_THEN40]]:
+// CHECK1-NEXT:    [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]]
+// CHECK1-NEXT:    [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]]
+// CHECK1-NEXT:    store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]]
+// CHECK1-NEXT:    [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]]
+// CHECK1-NEXT:    store i32 [[SUB44]], ptr [[J]], align 4
+// CHECK1-NEXT:    [[TMP61:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP61]])
+// CHECK1-NEXT:    br label %[[IF_END45]]
+// CHECK1:       [[IF_END45]]:
+// CHECK1-NEXT:    [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT:    [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]]
+// CHECK1-NEXT:    br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK1:       [[IF_THEN47]]:
+// CHECK1-NEXT:    [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
+// CHECK1-NEXT:    [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
+// CHECK1-NEXT:    [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]]
+// CHECK1-NEXT:    [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]]
+// CHECK1-NEXT:    store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4
+// CHECK1-NEXT:    [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT:    [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
+// CHECK1-NEXT:    [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT:    [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]]
+// CHECK1-NEXT:    [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]]
+// CHECK1-NEXT:    store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK1-NEXT:    [[TMP70:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP70]])
+// CHECK1-NEXT:    br label %[[IF_END52]]
+// CHECK1:       [[IF_END52]]:
+// CHECK1-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK1:       [[FOR_INC]]:
+// CHECK1-NEXT:    [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add i32 [[TMP71]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK1:       [[FOR_END]]:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @foo3(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
+// CHECK1-NEXT:    [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_UB03:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB04:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST05:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI06:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_IV07:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[C:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__END2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_UB117:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_LB118:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_ST119:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_NI120:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_IV122:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[__RANGE223:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__END224:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__BEGIN227:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_UB2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_LB2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_ST2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_NI2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_IV2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[V:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[VV:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT:    store i32 127, ptr [[DOTOMP_UB0]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK1-NEXT:    store i32 127, ptr [[DOTOMP_UB1]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    store i32 128, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK1-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1:       [[COND_TRUE]]:
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END:.*]]
+// CHECK1:       [[COND_FALSE]]:
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END]]
+// CHECK1:       [[COND_END]]:
+// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK1-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK1-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB04]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST05]], align 4
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK1-NEXT:    [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK1-NEXT:    store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8
+// CHECK1-NEXT:    store i32 42, ptr [[C]], align 4
+// CHECK1-NEXT:    store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK1-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK1-NEXT:    store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8
+// CHECK1-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK1-NEXT:    store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
+// CHECK1-NEXT:    [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK1-NEXT:    [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64
+// CHECK1-NEXT:    [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK1-NEXT:    [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK1-NEXT:    [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK1-NEXT:    [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1
+// CHECK1-NEXT:    [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1
+// CHECK1-NEXT:    [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1
+// CHECK1-NEXT:    store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
+// CHECK1-NEXT:    store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8
+// CHECK1-NEXT:    store i64 0, ptr [[DOTOMP_LB118]], align 8
+// CHECK1-NEXT:    store i64 1, ptr [[DOTOMP_ST119]], align 8
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
+// CHECK1-NEXT:    [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1
+// CHECK1-NEXT:    store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8
+// CHECK1-NEXT:    store i32 37, ptr [[CC]], align 4
+// CHECK1-NEXT:    store ptr [[ARR]], ptr [[__RANGE223]], align 8
+// CHECK1-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
+// CHECK1-NEXT:    [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256
+// CHECK1-NEXT:    store ptr [[ADD_PTR26]], ptr [[__END224]], align 8
+// CHECK1-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8
+// CHECK1-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8
+// CHECK1-NEXT:    store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8
+// CHECK1-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8
+// CHECK1-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT:    [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64
+// CHECK1-NEXT:    [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64
+// CHECK1-NEXT:    [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]]
+// CHECK1-NEXT:    [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8
+// CHECK1-NEXT:    [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1
+// CHECK1-NEXT:    [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1
+// CHECK1-NEXT:    [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1
+// CHECK1-NEXT:    [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1
+// CHECK1-NEXT:    store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
+// CHECK1-NEXT:    store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8
+// CHECK1-NEXT:    store i64 0, ptr [[DOTOMP_LB2]], align 8
+// CHECK1-NEXT:    store i64 1, ptr [[DOTOMP_ST2]], align 8
+// CHECK1-NEXT:    [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
+// CHECK1-NEXT:    [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1
+// CHECK1-NEXT:    store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT:    [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
+// CHECK1-NEXT:    store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8
+// CHECK1-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
+// CHECK1-NEXT:    [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
+// CHECK1-NEXT:    [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]]
+// CHECK1-NEXT:    br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]]
+// CHECK1:       [[COND_TRUE44]]:
+// CHECK1-NEXT:    [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
+// CHECK1-NEXT:    br label %[[COND_END46:.*]]
+// CHECK1:       [[COND_FALSE45]]:
+// CHECK1-NEXT:    [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
+// CHECK1-NEXT:    br label %[[COND_END46]]
+// CHECK1:       [[COND_END46]]:
+// CHECK1-NEXT:    [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ]
+// CHECK1-NEXT:    store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT:    [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT:    [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]]
+// CHECK1-NEXT:    br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]]
+// CHECK1:       [[COND_TRUE50]]:
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT:    br label %[[COND_END52:.*]]
+// CHECK1:       [[COND_FALSE51]]:
+// CHECK1-NEXT:    [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT:    br label %[[COND_END52]]
+// CHECK1:       [[COND_END52]]:
+// CHECK1-NEXT:    [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ]
+// CHECK1-NEXT:    store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8
+// CHECK1-NEXT:    store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK1:       [[FOR_COND]]:
+// CHECK1-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT:    [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8
+// CHECK1-NEXT:    [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]]
+// CHECK1-NEXT:    br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1:       [[FOR_BODY]]:
+// CHECK1-NEXT:    [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT:    [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
+// CHECK1-NEXT:    [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]]
+// CHECK1-NEXT:    br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]]
+// CHECK1:       [[IF_THEN]]:
+// CHECK1-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4
+// CHECK1-NEXT:    [[CONV57:%.*]] = sext i32 [[TMP38]] to i64
+// CHECK1-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4
+// CHECK1-NEXT:    [[CONV58:%.*]] = sext i32 [[TMP39]] to i64
+// CHECK1-NEXT:    [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]]
+// CHECK1-NEXT:    [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]]
+// CHECK1-NEXT:    [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32
+// CHECK1-NEXT:    store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4
+// CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4
+// CHECK1-NEXT:    [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1
+// CHECK1-NEXT:    [[ADD62:%.*]] = add nsw i32 0, [[MUL61]]
+// CHECK1-NEXT:    store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT:    br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]]
+// CHECK1:       [[IF_THEN64]]:
+// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]]
+// CHECK1-NEXT:    [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]]
+// CHECK1-NEXT:    store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1
+// CHECK1-NEXT:    [[ADD68:%.*]] = add nsw i32 0, [[MUL67]]
+// CHECK1-NEXT:    store i32 [[ADD68]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP48:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP48]])
+// CHECK1-NEXT:    br label %[[IF_END]]
+// CHECK1:       [[IF_END]]:
+// CHECK1-NEXT:    [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]]
+// CHECK1-NEXT:    br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]]
+// CHECK1:       [[IF_THEN70]]:
+// CHECK1-NEXT:    [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]]
+// CHECK1-NEXT:    [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]]
+// CHECK1-NEXT:    store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2
+// CHECK1-NEXT:    [[ADD74:%.*]] = add nsw i32 0, [[MUL73]]
+// CHECK1-NEXT:    store i32 [[ADD74]], ptr [[J]], align 4
+// CHECK1-NEXT:    [[TMP55:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP55]])
+// CHECK1-NEXT:    br label %[[IF_END75]]
+// CHECK1:       [[IF_END75]]:
+// CHECK1-NEXT:    br label %[[IF_END76]]
+// CHECK1:       [[IF_END76]]:
+// CHECK1-NEXT:    [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT:    [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
+// CHECK1-NEXT:    [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]]
+// CHECK1-NEXT:    br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]]
+// CHECK1:       [[IF_THEN78]]:
+// CHECK1-NEXT:    [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8
+// CHECK1-NEXT:    [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8
+// CHECK1-NEXT:    [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT:    [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]]
+// CHECK1-NEXT:    [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]]
+// CHECK1-NEXT:    store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8
+// CHECK1-NEXT:    [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
+// CHECK1-NEXT:    [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8
+// CHECK1-NEXT:    [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1
+// CHECK1-NEXT:    [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]]
+// CHECK1-NEXT:    store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    store ptr [[TMP63]], ptr [[V]], align 8
+// CHECK1-NEXT:    [[TMP64:%.*]] = load i32, ptr [[C]], align 4
+// CHECK1-NEXT:    [[TMP65:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK1-NEXT:    [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]])
+// CHECK1-NEXT:    br label %[[IF_END83]]
+// CHECK1:       [[IF_END83]]:
+// CHECK1-NEXT:    [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT:    [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT:    [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]]
+// CHECK1-NEXT:    br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]]
+// CHECK1:       [[IF_THEN85]]:
+// CHECK1-NEXT:    [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
+// CHECK1-NEXT:    [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
+// CHECK1-NEXT:    [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT:    [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]]
+// CHECK1-NEXT:    [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]]
+// CHECK1-NEXT:    store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8
+// CHECK1-NEXT:    [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT:    [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
+// CHECK1-NEXT:    [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1
+// CHECK1-NEXT:    [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]]
+// CHECK1-NEXT:    store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8
+// CHECK1-NEXT:    [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8
+// CHECK1-NEXT:    store ptr [[TMP74]], ptr [[VV]], align 8
+// CHECK1-NEXT:    [[TMP75:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK1-NEXT:    [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK1-NEXT:    [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]])
+// CHECK1-NEXT:    br label %[[IF_END90]]
+// CHECK1:       [[IF_END90]]:
+// CHECK1-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK1:       [[FOR_INC]]:
+// CHECK1-NEXT:    [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i64 [[TMP78]], 1
+// CHECK1-NEXT:    store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK1:       [[FOR_END]]:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @body(
+// CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] {
+// CHECK2-NEXT:  [[ENTRY:.*:]]
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @foo1(
+// CHECK2-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] {
+// CHECK2-NEXT:  [[ENTRY:.*:]]
+// CHECK2-NEXT:    [[START1_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[END1_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[STEP1_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[START2_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[END2_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[STEP2_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 [[START1]], ptr [[START1_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[END1]], ptr [[END1_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[START2]], ptr [[START2_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[END2]], ptr [[END2_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK2-NEXT:    [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK2-NEXT:    [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT:    store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT:    [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK2-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP10]], ptr [[J]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT:    [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK2-NEXT:    [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK2-NEXT:    [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK2-NEXT:    store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT:    store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT:    [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK2-NEXT:    store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]]
+// CHECK2-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2:       [[COND_TRUE]]:
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END:.*]]
+// CHECK2:       [[COND_FALSE]]:
+// CHECK2-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END]]
+// CHECK2:       [[COND_END]]:
+// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ]
+// CHECK2-NEXT:    store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK2:       [[FOR_COND]]:
+// CHECK2-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
+// CHECK2-NEXT:    br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2:       [[FOR_BODY]]:
+// CHECK2-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]]
+// CHECK2-NEXT:    br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK2:       [[IF_THEN]]:
+// CHECK2-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]]
+// CHECK2-NEXT:    [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]]
+// CHECK2-NEXT:    store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT:    [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]]
+// CHECK2-NEXT:    [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]]
+// CHECK2-NEXT:    store i32 [[ADD20]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP35:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP35]])
+// CHECK2-NEXT:    br label %[[IF_END]]
+// CHECK2:       [[IF_END]]:
+// CHECK2-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]]
+// CHECK2-NEXT:    br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]]
+// CHECK2:       [[IF_THEN22]]:
+// CHECK2-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]]
+// CHECK2-NEXT:    [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]]
+// CHECK2-NEXT:    store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT:    [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]]
+// CHECK2-NEXT:    store i32 [[ADD26]], ptr [[J]], align 4
+// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP44]])
+// CHECK2-NEXT:    br label %[[IF_END27]]
+// CHECK2:       [[IF_END27]]:
+// CHECK2-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK2:       [[FOR_INC]]:
+// CHECK2-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add i32 [[TMP45]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK2:       [[FOR_END]]:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @foo3(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT:  [[ENTRY:.*:]]
+// CHECK2-NEXT:    [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_UB03:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB04:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST05:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI06:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_IV07:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[C:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__END2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_UB117:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_LB118:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_ST119:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_NI120:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_IV122:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[__RANGE223:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__END224:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__BEGIN227:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_UB2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_LB2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_ST2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_NI2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_IV2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[V:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[VV:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT:    store i32 127, ptr [[DOTOMP_UB0]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK2-NEXT:    store i32 127, ptr [[DOTOMP_UB1]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    store i32 128, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK2-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2:       [[COND_TRUE]]:
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END:.*]]
+// CHECK2:       [[COND_FALSE]]:
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END]]
+// CHECK2:       [[COND_END]]:
+// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK2-NEXT:    store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB04]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST05]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK2-NEXT:    [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK2-NEXT:    store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8
+// CHECK2-NEXT:    store i32 42, ptr [[C]], align 4
+// CHECK2-NEXT:    store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK2-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK2-NEXT:    store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK2-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8
+// CHECK2-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK2-NEXT:    store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
+// CHECK2-NEXT:    [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK2-NEXT:    [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64
+// CHECK2-NEXT:    [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK2-NEXT:    [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK2-NEXT:    [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK2-NEXT:    [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1
+// CHECK2-NEXT:    [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1
+// CHECK2-NEXT:    [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1
+// CHECK2-NEXT:    store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
+// CHECK2-NEXT:    store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8
+// CHECK2-NEXT:    store i64 0, ptr [[DOTOMP_LB118]], align 8
+// CHECK2-NEXT:    store i64 1, ptr [[DOTOMP_ST119]], align 8
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
+// CHECK2-NEXT:    [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1
+// CHECK2-NEXT:    store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8
+// CHECK2-NEXT:    store i32 37, ptr [[CC]], align 4
+// CHECK2-NEXT:    store ptr [[ARR]], ptr [[__RANGE223]], align 8
+// CHECK2-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
+// CHECK2-NEXT:    [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256
+// CHECK2-NEXT:    store ptr [[ADD_PTR26]], ptr [[__END224]], align 8
+// CHECK2-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8
+// CHECK2-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8
+// CHECK2-NEXT:    store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8
+// CHECK2-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8
+// CHECK2-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT:    [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64
+// CHECK2-NEXT:    [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64
+// CHECK2-NEXT:    [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]]
+// CHECK2-NEXT:    [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8
+// CHECK2-NEXT:    [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1
+// CHECK2-NEXT:    [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1
+// CHECK2-NEXT:    [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1
+// CHECK2-NEXT:    [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1
+// CHECK2-NEXT:    store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
+// CHECK2-NEXT:    store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8
+// CHECK2-NEXT:    store i64 0, ptr [[DOTOMP_LB2]], align 8
+// CHECK2-NEXT:    store i64 1, ptr [[DOTOMP_ST2]], align 8
+// CHECK2-NEXT:    [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
+// CHECK2-NEXT:    [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1
+// CHECK2-NEXT:    store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT:    [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
+// CHECK2-NEXT:    store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8
+// CHECK2-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
+// CHECK2-NEXT:    [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
+// CHECK2-NEXT:    [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]]
+// CHECK2-NEXT:    br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]]
+// CHECK2:       [[COND_TRUE44]]:
+// CHECK2-NEXT:    [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
+// CHECK2-NEXT:    br label %[[COND_END46:.*]]
+// CHECK2:       [[COND_FALSE45]]:
+// CHECK2-NEXT:    [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
+// CHECK2-NEXT:    br label %[[COND_END46]]
+// CHECK2:       [[COND_END46]]:
+// CHECK2-NEXT:    [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ]
+// CHECK2-NEXT:    store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT:    [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT:    [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT:    [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]]
+// CHECK2-NEXT:    br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]]
+// CHECK2:       [[COND_TRUE50]]:
+// CHECK2-NEXT:    [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT:    br label %[[COND_END52:.*]]
+// CHECK2:       [[COND_FALSE51]]:
+// CHECK2-NEXT:    [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT:    br label %[[COND_END52]]
+// CHECK2:       [[COND_END52]]:
+// CHECK2-NEXT:    [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ]
+// CHECK2-NEXT:    store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8
+// CHECK2-NEXT:    store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK2:       [[FOR_COND]]:
+// CHECK2-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT:    [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8
+// CHECK2-NEXT:    [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]]
+// CHECK2-NEXT:    br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2:       [[FOR_BODY]]:
+// CHECK2-NEXT:    [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT:    [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
+// CHECK2-NEXT:    [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]]
+// CHECK2-NEXT:    br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]]
+// CHECK2:       [[IF_THEN]]:
+// CHECK2-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4
+// CHECK2-NEXT:    [[CONV57:%.*]] = sext i32 [[TMP38]] to i64
+// CHECK2-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4
+// CHECK2-NEXT:    [[CONV58:%.*]] = sext i32 [[TMP39]] to i64
+// CHECK2-NEXT:    [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]]
+// CHECK2-NEXT:    [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]]
+// CHECK2-NEXT:    [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32
+// CHECK2-NEXT:    store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4
+// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4
+// CHECK2-NEXT:    [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1
+// CHECK2-NEXT:    [[ADD62:%.*]] = add nsw i32 0, [[MUL61]]
+// CHECK2-NEXT:    store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT:    br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]]
+// CHECK2:       [[IF_THEN64]]:
+// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]]
+// CHECK2-NEXT:    [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]]
+// CHECK2-NEXT:    store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1
+// CHECK2-NEXT:    [[ADD68:%.*]] = add nsw i32 0, [[MUL67]]
+// CHECK2-NEXT:    store i32 [[ADD68]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP48:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP48]])
+// CHECK2-NEXT:    br label %[[IF_END]]
+// CHECK2:       [[IF_END]]:
+// CHECK2-NEXT:    [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]]
+// CHECK2-NEXT:    br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]]
+// CHECK2:       [[IF_THEN70]]:
+// CHECK2-NEXT:    [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]]
+// CHECK2-NEXT:    [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]]
+// CHECK2-NEXT:    store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2
+// CHECK2-NEXT:    [[ADD74:%.*]] = add nsw i32 0, [[MUL73]]
+// CHECK2-NEXT:    store i32 [[ADD74]], ptr [[J]], align 4
+// CHECK2-NEXT:    [[TMP55:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP55]])
+// CHECK2-NEXT:    br label %[[IF_END75]]
+// CHECK2:       [[IF_END75]]:
+// CHECK2-NEXT:    br label %[[IF_END76]]
+// CHECK2:       [[IF_END76]]:
+// CHECK2-NEXT:    [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT:    [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
+// CHECK2-NEXT:    [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]]
+// CHECK2-NEXT:    br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]]
+// CHECK2:       [[IF_THEN78]]:
+// CHECK2-NEXT:    [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8
+// CHECK2-NEXT:    [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8
+// CHECK2-NEXT:    [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT:    [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]]
+// CHECK2-NEXT:    [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]]
+// CHECK2-NEXT:    store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8
+// CHECK2-NEXT:    [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
+// CHECK2-NEXT:    [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8
+// CHECK2-NEXT:    [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1
+// CHECK2-NEXT:    [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]]
+// CHECK2-NEXT:    store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    store ptr [[TMP63]], ptr [[V]], align 8
+// CHECK2-NEXT:    [[TMP64:%.*]] = load i32, ptr [[C]], align 4
+// CHECK2-NEXT:    [[TMP65:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK2-NEXT:    [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]])
+// CHECK2-NEXT:    br label %[[IF_END83]]
+// CHECK2:       [[IF_END83]]:
+// CHECK2-NEXT:    [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT:    [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT:    [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]]
+// CHECK2-NEXT:    br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]]
+// CHECK2:       [[IF_THEN85]]:
+// CHECK2-NEXT:    [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
+// CHECK2-NEXT:    [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
+// CHECK2-NEXT:    [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT:    [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]]
+// CHECK2-NEXT:    [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]]
+// CHECK2-NEXT:    store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8
+// CHECK2-NEXT:    [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT:    [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
+// CHECK2-NEXT:    [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1
+// CHECK2-NEXT:    [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]]
+// CHECK2-NEXT:    store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8
+// CHECK2-NEXT:    [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8
+// CHECK2-NEXT:    store ptr [[TMP74]], ptr [[VV]], align 8
+// CHECK2-NEXT:    [[TMP75:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK2-NEXT:    [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK2-NEXT:    [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]])
+// CHECK2-NEXT:    br label %[[IF_END90]]
+// CHECK2:       [[IF_END90]]:
+// CHECK2-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK2:       [[FOR_INC]]:
+// CHECK2-NEXT:    [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i64 [[TMP78]], 1
+// CHECK2-NEXT:    store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK2:       [[FOR_END]]:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @tfoo2(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT:  [[ENTRY:.*:]]
+// CHECK2-NEXT:    call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4)
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_(
+// CHECK2-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat {
+// CHECK2-NEXT:  [[ENTRY:.*:]]
+// CHECK2-NEXT:    [[START_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[END_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[STEP_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[K:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTNEW_STEP21:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_UB2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 [[START]], ptr [[START_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[END]], ptr [[END_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[STEP]], ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK2-NEXT:    [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK2-NEXT:    [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT:    store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT:    [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK2-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP10]], ptr [[J]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT:    [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK2-NEXT:    [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK2-NEXT:    [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK2-NEXT:    store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT:    store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT:    [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK2-NEXT:    store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK2-NEXT:    store i32 [[ADD16]], ptr [[K]], align 4
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT:    [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK2-NEXT:    store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT:    [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT:    [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
+// CHECK2-NEXT:    store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK2-NEXT:    [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK2-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT:    [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]]
+// CHECK2-NEXT:    [[SUB24:%.*]] = sub i32 [[SUB23]], 1
+// CHECK2-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT:    [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]]
+// CHECK2-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT:    [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]]
+// CHECK2-NEXT:    [[SUB27:%.*]] = sub i32 [[DIV26]], 1
+// CHECK2-NEXT:    store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK2-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK2-NEXT:    store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB2]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST2]], align 4
+// CHECK2-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK2-NEXT:    [[ADD28:%.*]] = add i32 [[TMP32]], 1
+// CHECK2-NEXT:    store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]]
+// CHECK2-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2:       [[COND_TRUE]]:
+// CHECK2-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END:.*]]
+// CHECK2:       [[COND_FALSE]]:
+// CHECK2-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END]]
+// CHECK2:       [[COND_END]]:
+// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ]
+// CHECK2-NEXT:    store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT:    [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]]
+// CHECK2-NEXT:    br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]]
+// CHECK2:       [[COND_TRUE30]]:
+// CHECK2-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT:    br label %[[COND_END32:.*]]
+// CHECK2:       [[COND_FALSE31]]:
+// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT:    br label %[[COND_END32]]
+// CHECK2:       [[COND_END32]]:
+// CHECK2-NEXT:    [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ]
+// CHECK2-NEXT:    store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK2:       [[FOR_COND]]:
+// CHECK2-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT:    br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2:       [[FOR_BODY]]:
+// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]]
+// CHECK2-NEXT:    br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK2:       [[IF_THEN]]:
+// CHECK2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]]
+// CHECK2-NEXT:    [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]]
+// CHECK2-NEXT:    store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT:    [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]]
+// CHECK2-NEXT:    [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]]
+// CHECK2-NEXT:    store i32 [[ADD38]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP52:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP52]])
+// CHECK2-NEXT:    br label %[[IF_END]]
+// CHECK2:       [[IF_END]]:
+// CHECK2-NEXT:    [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]]
+// CHECK2-NEXT:    br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]]
+// CHECK2:       [[IF_THEN40]]:
+// CHECK2-NEXT:    [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]]
+// CHECK2-NEXT:    [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]]
+// CHECK2-NEXT:    store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]]
+// CHECK2-NEXT:    [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]]
+// CHECK2-NEXT:    store i32 [[SUB44]], ptr [[J]], align 4
+// CHECK2-NEXT:    [[TMP61:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP61]])
+// CHECK2-NEXT:    br label %[[IF_END45]]
+// CHECK2:       [[IF_END45]]:
+// CHECK2-NEXT:    [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT:    [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]]
+// CHECK2-NEXT:    br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK2:       [[IF_THEN47]]:
+// CHECK2-NEXT:    [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
+// CHECK2-NEXT:    [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
+// CHECK2-NEXT:    [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]]
+// CHECK2-NEXT:    [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]]
+// CHECK2-NEXT:    store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4
+// CHECK2-NEXT:    [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT:    [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
+// CHECK2-NEXT:    [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT:    [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]]
+// CHECK2-NEXT:    [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]]
+// CHECK2-NEXT:    store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK2-NEXT:    [[TMP70:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP70]])
+// CHECK2-NEXT:    br label %[[IF_END52]]
+// CHECK2:       [[IF_END52]]:
+// CHECK2-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK2:       [[FOR_INC]]:
+// CHECK2-NEXT:    [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add i32 [[TMP71]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK2:       [[FOR_END]]:
+// CHECK2-NEXT:    ret void
+//
+//.
+// CHECK1: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
+// CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"}
+// CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]}
+// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
+//.
+// CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
+// CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"}
+// CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]}
+// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
+//.
diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp
new file mode 100644
index 0000000000000..50dedfd2c0dc6
--- /dev/null
+++ b/clang/test/OpenMP/fuse_messages.cpp
@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -fsyntax-only -Wuninitialized -verify %s
+
+void func() {
+
+    // expected-error at +2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}}
+    #pragma omp fuse 
+    ;
+
+    // expected-error at +2 {{statement after '#pragma omp fuse' must be a for loop}}
+    #pragma omp fuse 
+    {int bar = 0;}
+
+    // expected-error at +4 {{statement after '#pragma omp fuse' must be a for loop}}
+    #pragma omp fuse 
+    {
+        for(int i = 0; i < 10; ++i);
+        int x = 2;
+    }
+
+    // expected-error at +2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}}
+    #pragma omp fuse 
+    #pragma omp for 
+    for (int i = 0; i < 7; ++i)
+        ;
+
+    {
+        // expected-error at +2 {{expected statement}}
+        #pragma omp fuse
+    }
+
+    // expected-warning at +1 {{extra tokens at the end of '#pragma omp fuse' are ignored}}
+    #pragma omp fuse foo
+    {
+        for (int i = 0; i < 7; ++i)
+            ;
+    }
+
+
+    // expected-error at +1 {{unexpected OpenMP clause 'final' in directive '#pragma omp fuse'}}
+    #pragma omp fuse final(0) 
+    {
+        for (int i = 0; i < 7; ++i)
+            ;
+    }
+
+    //expected-error at +4 {{loop after '#pragma omp fuse' is not in canonical form}}
+    //expected-error at +3 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'i'}}
+    #pragma omp fuse 
+    {
+        for(int i = 0; i < 10; i*=2) {
+            ;
+        }
+    }
+
+    //expected-error at +2 {{loop sequence after '#pragma omp fuse' must contain at least 1 canonical loop or loop-generating construct}}
+    #pragma omp fuse 
+    {}
+
+    //expected-error at +3 {{statement after '#pragma omp fuse' must be a for loop}}
+    #pragma omp fuse 
+    {
+        #pragma omp unroll full 
+        for(int i = 0; i < 10; ++i);
+        
+        for(int j = 0; j < 10; ++j);
+    }
+
+    //expected-warning at +5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'unsigned int'}}
+    //expected-warning at +5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'long long'}}
+    #pragma omp fuse 
+    {
+        for(int i = 0; i < 10; ++i);
+        for(unsigned int j = 0; j < 10; ++j);
+        for(long long k = 0; k < 100; ++k);
+    }
+}
\ No newline at end of file
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 06a17006fdee9..fd788ac3d69d4 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2206,6 +2206,7 @@ class EnqueueVisitor : public ConstStmtVisitor<EnqueueVisitor, void>,
   void VisitOMPUnrollDirective(const OMPUnrollDirective *D);
   void VisitOMPReverseDirective(const OMPReverseDirective *D);
   void VisitOMPInterchangeDirective(const OMPInterchangeDirective *D);
+  void VisitOMPFuseDirective(const OMPFuseDirective *D);
   void VisitOMPForDirective(const OMPForDirective *D);
   void VisitOMPForSimdDirective(const OMPForSimdDirective *D);
   void VisitOMPSectionsDirective(const OMPSectionsDirective *D);
@@ -3364,6 +3365,10 @@ void EnqueueVisitor::VisitOMPInterchangeDirective(
   VisitOMPLoopTransformationDirective(D);
 }
 
+void EnqueueVisitor::VisitOMPFuseDirective(const OMPFuseDirective *D) {
+  VisitOMPLoopTransformationDirective(D);
+}
+
 void EnqueueVisitor::VisitOMPForDirective(const OMPForDirective *D) {
   VisitOMPLoopDirective(D);
 }
@@ -6317,6 +6322,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) {
     return cxstring::createRef("OMPReverseDirective");
   case CXCursor_OMPInterchangeDirective:
     return cxstring::createRef("OMPInterchangeDirective");
+  case CXCursor_OMPFuseDirective:
+    return cxstring::createRef("OMPFuseDirective");
   case CXCursor_OMPForDirective:
     return cxstring::createRef("OMPForDirective");
   case CXCursor_OMPForSimdDirective:
diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index 635d03a88d105..709fa60d28d8d 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -688,6 +688,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
   case Stmt::OMPInterchangeDirectiveClass:
     K = CXCursor_OMPInterchangeDirective;
     break;
+  case Stmt::OMPFuseDirectiveClass:
+    K = CXCursor_OMPFuseDirective;
+    break;
   case Stmt::OMPForDirectiveClass:
     K = CXCursor_OMPForDirective;
     break;
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 0af4b436649a3..8286cfcadaafd 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -852,6 +852,10 @@ def OMP_For : Directive<"for"> {
   let category = CA_Executable;
   let languages = [L_C];
 }
+def OMP_Fuse : Directive<"fuse"> {
+  let association = AS_Loop;
+  let category = CA_Executable;
+}
 def OMP_Interchange : Directive<"interchange"> {
   let allowedOnceClauses = [
     VersionedClause<OMPC_Permutation>,
diff --git a/openmp/runtime/test/transform/fuse/foreach.cpp b/openmp/runtime/test/transform/fuse/foreach.cpp
new file mode 100644
index 0000000000000..cabf4bf8a511d
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/foreach.cpp
@@ -0,0 +1,192 @@
+// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdarg>
+#include <cstdio>
+#include <vector>
+
+struct Reporter {
+  const char *name;
+
+  Reporter(const char *name) : name(name) { print("ctor"); }
+
+  Reporter() : name("<anon>") { print("ctor"); }
+
+  Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); }
+
+  Reporter(Reporter &&that) : name(that.name) { print("move ctor"); }
+
+  ~Reporter() { print("dtor"); }
+
+  const Reporter &operator=(const Reporter &that) {
+    print("copy assign");
+    this->name = that.name;
+    return *this;
+  }
+
+  const Reporter &operator=(Reporter &&that) {
+    print("move assign");
+    this->name = that.name;
+    return *this;
+  }
+
+  struct Iterator {
+    const Reporter *owner;
+    int pos;
+
+    Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {}
+
+    Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) {
+      owner->print("iterator copy ctor");
+    }
+
+    Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) {
+      owner->print("iterator move ctor");
+    }
+
+    ~Iterator() { owner->print("iterator dtor"); }
+
+    const Iterator &operator=(const Iterator &that) {
+      owner->print("iterator copy assign");
+      this->owner = that.owner;
+      this->pos = that.pos;
+      return *this;
+    }
+
+    const Iterator &operator=(Iterator &&that) {
+      owner->print("iterator move assign");
+      this->owner = that.owner;
+      this->pos = that.pos;
+      return *this;
+    }
+
+    bool operator==(const Iterator &that) const {
+      owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos);
+      return this->pos == that.pos;
+    }
+
+    Iterator &operator++() {
+      owner->print("iterator prefix ++");
+      pos -= 1;
+      return *this;
+    }
+
+    Iterator operator++(int) {
+      owner->print("iterator postfix ++");
+      auto result = *this;
+      pos -= 1;
+      return result;
+    }
+
+    int operator*() const {
+      int result = 2 - pos;
+      owner->print("iterator deref: %i", result);
+      return result;
+    }
+
+    size_t operator-(const Iterator &that) const {
+      int result = (2 - this->pos) - (2 - that.pos);
+      owner->print("iterator distance: %d", result);
+      return result;
+    }
+
+    Iterator operator+(int steps) const {
+      owner->print("iterator advance: %i += %i", 2 - this->pos, steps);
+      return Iterator(owner, pos - steps);
+    }
+
+    void print(const char *msg) const { owner->print(msg); }
+  };
+
+  Iterator begin() const {
+    print("begin()");
+    return Iterator(this, 2);
+  }
+
+  Iterator end() const {
+    print("end()");
+    return Iterator(this, -1);
+  }
+
+  void print(const char *msg, ...) const {
+    va_list args;
+    va_start(args, msg);
+    printf("[%s] ", name);
+    vprintf(msg, args);
+    printf("\n");
+    va_end(args);
+  }
+};
+
+int main() {
+  printf("do\n");
+#pragma omp fuse
+  {
+    for (Reporter a{"C"}; auto &&v : Reporter("A"))
+      printf("v=%d\n", v);
+    for (Reporter aa{"D"}; auto &&vv : Reporter("B"))
+      printf("vv=%d\n", vv);
+  }
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+// CHECK: [C] ctor
+// CHECK-NEXT: [A] ctor
+// CHECK-NEXT: [A] end()
+// CHECK-NEXT: [A] begin()
+// CHECK-NEXT: [A] begin()
+// CHECK-NEXT: [A] iterator distance: 3
+// CHECK-NEXT: [D] ctor
+// CHECK-NEXT: [B] ctor
+// CHECK-NEXT: [B] end()
+// CHECK-NEXT: [B] begin()
+// CHECK-NEXT: [B] begin()
+// CHECK-NEXT: [B] iterator distance: 3
+// CHECK-NEXT: [A] iterator advance: 0 += 0
+// CHECK-NEXT: [A] iterator move assign
+// CHECK-NEXT: [A] iterator deref: 0
+// CHECK-NEXT: v=0
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [B] iterator advance: 0 += 0
+// CHECK-NEXT: [B] iterator move assign
+// CHECK-NEXT: [B] iterator deref: 0
+// CHECK-NEXT: vv=0
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [A] iterator advance: 0 += 1
+// CHECK-NEXT: [A] iterator move assign
+// CHECK-NEXT: [A] iterator deref: 1
+// CHECK-NEXT: v=1
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [B] iterator advance: 0 += 1
+// CHECK-NEXT: [B] iterator move assign
+// CHECK-NEXT: [B] iterator deref: 1
+// CHECK-NEXT: vv=1
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [A] iterator advance: 0 += 2
+// CHECK-NEXT: [A] iterator move assign
+// CHECK-NEXT: [A] iterator deref: 2
+// CHECK-NEXT: v=2
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [B] iterator advance: 0 += 2
+// CHECK-NEXT: [B] iterator move assign
+// CHECK-NEXT: [B] iterator deref: 2
+// CHECK-NEXT: vv=2
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [B] dtor
+// CHECK-NEXT: [D] dtor
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [A] dtor
+// CHECK-NEXT: [C] dtor
+// CHECK-NEXT: done
+
+
+#endif
diff --git a/openmp/runtime/test/transform/fuse/intfor.c b/openmp/runtime/test/transform/fuse/intfor.c
new file mode 100644
index 0000000000000..b8171b4df7042
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/intfor.c
@@ -0,0 +1,50 @@
+// RUN: %libomp-compile-and-run  | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main() {
+  printf("do\n");
+#pragma omp fuse
+  {
+    for (int i = 5; i <= 25; i += 5)
+      printf("i=%d\n", i);
+    for (int j = 10; j < 100; j += 10)
+      printf("j=%d\n", j);
+    for (int k = 10; k > 0; --k)
+      printf("k=%d\n", k);
+  }
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+#endif /* HEADER */
+
+// CHECK: do
+// CHECK-NEXT: i=5
+// CHECK-NEXT: j=10
+// CHECK-NEXT: k=10
+// CHECK-NEXT: i=10
+// CHECK-NEXT: j=20
+// CHECK-NEXT: k=9
+// CHECK-NEXT: i=15
+// CHECK-NEXT: j=30
+// CHECK-NEXT: k=8
+// CHECK-NEXT: i=20
+// CHECK-NEXT: j=40
+// CHECK-NEXT: k=7
+// CHECK-NEXT: i=25
+// CHECK-NEXT: j=50
+// CHECK-NEXT: k=6
+// CHECK-NEXT: j=60
+// CHECK-NEXT: k=5
+// CHECK-NEXT: j=70
+// CHECK-NEXT: k=4
+// CHECK-NEXT: j=80
+// CHECK-NEXT: k=3
+// CHECK-NEXT: j=90
+// CHECK-NEXT: k=2
+// CHECK-NEXT: k=1
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/fuse/iterfor.cpp b/openmp/runtime/test/transform/fuse/iterfor.cpp
new file mode 100644
index 0000000000000..552484b2981c4
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/iterfor.cpp
@@ -0,0 +1,194 @@
+// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdarg>
+#include <cstdio>
+#include <vector>
+
+struct Reporter {
+  const char *name;
+
+  Reporter(const char *name) : name(name) { print("ctor"); }
+
+  Reporter() : name("<anon>") { print("ctor"); }
+
+  Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); }
+
+  Reporter(Reporter &&that) : name(that.name) { print("move ctor"); }
+
+  ~Reporter() { print("dtor"); }
+
+  const Reporter &operator=(const Reporter &that) {
+    print("copy assign");
+    this->name = that.name;
+    return *this;
+  }
+
+  const Reporter &operator=(Reporter &&that) {
+    print("move assign");
+    this->name = that.name;
+    return *this;
+  }
+
+  struct Iterator {
+    const Reporter *owner;
+    int pos;
+
+    Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {}
+
+    Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) {
+      owner->print("iterator copy ctor");
+    }
+
+    Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) {
+      owner->print("iterator move ctor");
+    }
+
+    ~Iterator() { owner->print("iterator dtor"); }
+
+    const Iterator &operator=(const Iterator &that) {
+      owner->print("iterator copy assign");
+      this->owner = that.owner;
+      this->pos = that.pos;
+      return *this;
+    }
+
+    const Iterator &operator=(Iterator &&that) {
+      owner->print("iterator move assign");
+      this->owner = that.owner;
+      this->pos = that.pos;
+      return *this;
+    }
+
+    bool operator==(const Iterator &that) const {
+      owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos);
+      return this->pos == that.pos;
+    }
+
+    bool operator!=(const Iterator &that) const {
+      owner->print("iterator %d != %d", 2 - this->pos, 2 - that.pos);
+      return this->pos != that.pos;
+    }
+
+    Iterator &operator++() {
+      owner->print("iterator prefix ++");
+      pos -= 1;
+      return *this;
+    }
+
+    Iterator operator++(int) {
+      owner->print("iterator postfix ++");
+      auto result = *this;
+      pos -= 1;
+      return result;
+    }
+
+    int operator*() const {
+      int result = 2 - pos;
+      owner->print("iterator deref: %i", result);
+      return result;
+    }
+
+    size_t operator-(const Iterator &that) const {
+      int result = (2 - this->pos) - (2 - that.pos);
+      owner->print("iterator distance: %d", result);
+      return result;
+    }
+
+    Iterator operator+(int steps) const {
+      owner->print("iterator advance: %i += %i", 2 - this->pos, steps);
+      return Iterator(owner, pos - steps);
+    }
+  };
+
+  Iterator begin() const {
+    print("begin()");
+    return Iterator(this, 2);
+  }
+
+  Iterator end() const {
+    print("end()");
+    return Iterator(this, -1);
+  }
+
+  void print(const char *msg, ...) const {
+    va_list args;
+    va_start(args, msg);
+    printf("[%s] ", name);
+    vprintf(msg, args);
+    printf("\n");
+    va_end(args);
+  }
+};
+
+int main() {
+  printf("do\n");
+  Reporter C("C");
+  Reporter D("D");
+#pragma omp fuse
+  {
+    for (auto it = C.begin(); it != C.end(); ++it)
+      printf("v=%d\n", *it);
+
+    for (auto it = D.begin(); it != D.end(); ++it)
+      printf("vv=%d\n", *it);
+  }
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK: do
+// CHECK: [C] ctor
+// CHECK-NEXT: [D] ctor
+// CHECK-NEXT: [C] begin()
+// CHECK-NEXT: [C] begin()
+// CHECK-NEXT: [C] end()
+// CHECK-NEXT: [C] iterator distance: 3
+// CHECK-NEXT: [D] begin()
+// CHECK-NEXT: [D] begin()
+// CHECK-NEXT: [D] end()
+// CHECK-NEXT: [D] iterator distance: 3
+// CHECK-NEXT: [C] iterator advance: 0 += 0
+// CHECK-NEXT: [C] iterator move assign
+// CHECK-NEXT: [C] iterator deref: 0
+// CHECK-NEXT: v=0
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [D] iterator advance: 0 += 0
+// CHECK-NEXT: [D] iterator move assign
+// CHECK-NEXT: [D] iterator deref: 0
+// CHECK-NEXT: vv=0
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [C] iterator advance: 0 += 1
+// CHECK-NEXT: [C] iterator move assign
+// CHECK-NEXT: [C] iterator deref: 1
+// CHECK-NEXT: v=1
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [D] iterator advance: 0 += 1
+// CHECK-NEXT: [D] iterator move assign
+// CHECK-NEXT: [D] iterator deref: 1
+// CHECK-NEXT: vv=1
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [C] iterator advance: 0 += 2
+// CHECK-NEXT: [C] iterator move assign
+// CHECK-NEXT: [C] iterator deref: 2
+// CHECK-NEXT: v=2
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [D] iterator advance: 0 += 2
+// CHECK-NEXT: [D] iterator move assign
+// CHECK-NEXT: [D] iterator deref: 2
+// CHECK-NEXT: vv=2
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: done
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [D] dtor
+// CHECK-NEXT: [C] dtor
diff --git a/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp
new file mode 100644
index 0000000000000..e9f76713fe3e0
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp
@@ -0,0 +1,208 @@
+// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdarg>
+#include <cstdio>
+#include <vector>
+
+struct Reporter {
+  const char *name;
+
+  Reporter(const char *name) : name(name) { print("ctor"); }
+
+  Reporter() : name("<anon>") { print("ctor"); }
+
+  Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); }
+
+  Reporter(Reporter &&that) : name(that.name) { print("move ctor"); }
+
+  ~Reporter() { print("dtor"); }
+
+  const Reporter &operator=(const Reporter &that) {
+    print("copy assign");
+    this->name = that.name;
+    return *this;
+  }
+
+  const Reporter &operator=(Reporter &&that) {
+    print("move assign");
+    this->name = that.name;
+    return *this;
+  }
+
+  struct Iterator {
+    const Reporter *owner;
+    int pos;
+
+    Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {}
+
+    Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) {
+      owner->print("iterator copy ctor");
+    }
+
+    Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) {
+      owner->print("iterator move ctor");
+    }
+
+    ~Iterator() { owner->print("iterator dtor"); }
+
+    const Iterator &operator=(const Iterator &that) {
+      owner->print("iterator copy assign");
+      this->owner = that.owner;
+      this->pos = that.pos;
+      return *this;
+    }
+
+    const Iterator &operator=(Iterator &&that) {
+      owner->print("iterator move assign");
+      this->owner = that.owner;
+      this->pos = that.pos;
+      return *this;
+    }
+
+    bool operator==(const Iterator &that) const {
+      owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos);
+      return this->pos == that.pos;
+    }
+
+    Iterator &operator++() {
+      owner->print("iterator prefix ++");
+      pos -= 1;
+      return *this;
+    }
+
+    Iterator operator++(int) {
+      owner->print("iterator postfix ++");
+      auto result = *this;
+      pos -= 1;
+      return result;
+    }
+
+    int operator*() const {
+      int result = 2 - pos;
+      owner->print("iterator deref: %i", result);
+      return result;
+    }
+
+    size_t operator-(const Iterator &that) const {
+      int result = (2 - this->pos) - (2 - that.pos);
+      owner->print("iterator distance: %d", result);
+      return result;
+    }
+
+    Iterator operator+(int steps) const {
+      owner->print("iterator advance: %i += %i", 2 - this->pos, steps);
+      return Iterator(owner, pos - steps);
+    }
+
+    void print(const char *msg) const { owner->print(msg); }
+  };
+
+  Iterator begin() const {
+    print("begin()");
+    return Iterator(this, 2);
+  }
+
+  Iterator end() const {
+    print("end()");
+    return Iterator(this, -1);
+  }
+
+  void print(const char *msg, ...) const {
+    va_list args;
+    va_start(args, msg);
+    printf("[%s] ", name);
+    vprintf(msg, args);
+    printf("\n");
+    va_end(args);
+  }
+};
+
+int main() {
+  printf("do\n");
+#pragma omp parallel for collapse(2) num_threads(1)
+  for (int i = 0; i < 3; ++i)
+#pragma omp fuse
+  {
+    for (Reporter c{"init-stmt"}; auto &&v : Reporter("range"))
+      printf("i=%d v=%d\n", i, v);
+    for (int vv = 0; vv < 3; ++vv)
+      printf("i=%d vv=%d\n", i, vv);
+  }
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK: do
+// CHECK-NEXT: [init-stmt] ctor
+// CHECK-NEXT: [range] ctor
+// CHECK-NEXT: [range] end()
+// CHECK-NEXT: [range] begin()
+// CHECK-NEXT: [range] begin()
+// CHECK-NEXT: [range] iterator distance: 3
+// CHECK-NEXT: [range] iterator advance: 0 += 0
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 0
+// CHECK-NEXT: i=0 v=0
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=0 vv=0
+// CHECK-NEXT: [range] iterator advance: 0 += 1
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 1
+// CHECK-NEXT: i=0 v=1
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=0 vv=1
+// CHECK-NEXT: [range] iterator advance: 0 += 2
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 2
+// CHECK-NEXT: i=0 v=2
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=0 vv=2
+// CHECK-NEXT: [range] iterator advance: 0 += 0
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 0
+// CHECK-NEXT: i=1 v=0
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=1 vv=0
+// CHECK-NEXT: [range] iterator advance: 0 += 1
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 1
+// CHECK-NEXT: i=1 v=1
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=1 vv=1
+// CHECK-NEXT: [range] iterator advance: 0 += 2
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 2
+// CHECK-NEXT: i=1 v=2
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=1 vv=2
+// CHECK-NEXT: [range] iterator advance: 0 += 0
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 0
+// CHECK-NEXT: i=2 v=0
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=2 vv=0
+// CHECK-NEXT: [range] iterator advance: 0 += 1
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 1
+// CHECK-NEXT: i=2 v=1
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=2 vv=1
+// CHECK-NEXT: [range] iterator advance: 0 += 2
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 2
+// CHECK-NEXT: i=2 v=2
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=2 vv=2
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: [range] dtor
+// CHECK-NEXT: [init-stmt] dtor
+// CHECK-NEXT: done
+
diff --git a/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c
new file mode 100644
index 0000000000000..272908e72c429
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c
@@ -0,0 +1,45 @@
+// RUN: %libomp-cxx-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdio>
+
+int main() {
+  printf("do\n");
+#pragma omp parallel for collapse(2) num_threads(1)
+  for (int i = 0; i < 3; ++i)
+#pragma omp fuse
+  {
+    for (int j = 0; j < 3; ++j)
+      printf("i=%d j=%d\n", i, j);
+    for (int k = 0; k < 3; ++k)
+      printf("i=%d k=%d\n", i, k);
+  }
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK:      do
+// CHECK: i=0 j=0
+// CHECK-NEXT: i=0 k=0
+// CHECK-NEXT: i=0 j=1
+// CHECK-NEXT: i=0 k=1
+// CHECK-NEXT: i=0 j=2
+// CHECK-NEXT: i=0 k=2
+// CHECK-NEXT: i=1 j=0
+// CHECK-NEXT: i=1 k=0
+// CHECK-NEXT: i=1 j=1
+// CHECK-NEXT: i=1 k=1
+// CHECK-NEXT: i=1 j=2
+// CHECK-NEXT: i=1 k=2
+// CHECK-NEXT: i=2 j=0
+// CHECK-NEXT: i=2 k=0
+// CHECK-NEXT: i=2 j=1
+// CHECK-NEXT: i=2 k=1
+// CHECK-NEXT: i=2 j=2
+// CHECK-NEXT: i=2 k=2
+// CHECK-NEXT: done

>From 7e3bd1e3afcdc246da0362ffb8693b160f9d3f4a Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:28:04 +0000
Subject: [PATCH 2/9] Add looprange clause

---
 clang/include/clang/AST/OpenMPClause.h        | 100 ++++++
 clang/include/clang/AST/RecursiveASTVisitor.h |   8 +
 clang/include/clang/AST/StmtOpenMP.h          |  18 +-
 .../clang/Basic/DiagnosticSemaKinds.td        |   5 +
 clang/include/clang/Parse/Parser.h            |   3 +
 clang/include/clang/Sema/SemaOpenMP.h         |   6 +
 clang/lib/AST/OpenMPClause.cpp                |  35 ++
 clang/lib/AST/StmtOpenMP.cpp                  |   7 +-
 clang/lib/AST/StmtProfile.cpp                 |   7 +
 clang/lib/Basic/OpenMPKinds.cpp               |   2 +
 clang/lib/Parse/ParseOpenMP.cpp               |  36 ++
 clang/lib/Sema/SemaOpenMP.cpp                 | 155 +++++++--
 clang/lib/Sema/TreeTransform.h                |  33 ++
 clang/lib/Serialization/ASTReader.cpp         |  11 +
 clang/lib/Serialization/ASTReaderStmt.cpp     |   4 +-
 clang/lib/Serialization/ASTWriter.cpp         |   8 +
 clang/test/OpenMP/fuse_ast_print.cpp          |  67 ++++
 clang/test/OpenMP/fuse_codegen.cpp            | 320 +++++++++++++++++-
 clang/test/OpenMP/fuse_messages.cpp           | 112 +++++-
 clang/tools/libclang/CIndex.cpp               |   5 +
 llvm/include/llvm/Frontend/OpenMP/ClauseT.h   |  16 +-
 llvm/include/llvm/Frontend/OpenMP/OMP.td      |   6 +
 22 files changed, 921 insertions(+), 43 deletions(-)

diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index 6fd16bc0f03be..8f937cdef9cd0 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -1143,6 +1143,106 @@ class OMPFullClause final : public OMPNoChildClause<llvm::omp::OMPC_full> {
   static OMPFullClause *CreateEmpty(const ASTContext &C);
 };
 
+/// This class represents the 'looprange' clause in the
+/// '#pragma omp fuse' directive
+///
+/// \code {c}
+/// #pragma omp fuse looprange(1,2)
+/// {
+///   for(int i = 0; i < 64; ++i)
+///   for(int j = 0; j < 256; j+=2)
+///   for(int k = 127; k >= 0; --k)
+/// \endcode
+class OMPLoopRangeClause final : public OMPClause {
+  friend class OMPClauseReader;
+
+  explicit OMPLoopRangeClause()
+      : OMPClause(llvm::omp::OMPC_looprange, {}, {}) {}
+
+  /// Location of '('
+  SourceLocation LParenLoc;
+
+  /// Location of 'first'
+  SourceLocation FirstLoc;
+
+  /// Location of 'count'
+  SourceLocation CountLoc;
+
+  /// Expr associated with 'first' argument
+  Expr *First = nullptr;
+
+  /// Expr associated with 'count' argument
+  Expr *Count = nullptr;
+
+  /// Set 'first'
+  void setFirst(Expr *First) { this->First = First; }
+
+  /// Set 'count'
+  void setCount(Expr *Count) { this->Count = Count; }
+
+  /// Set location of '('.
+  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
+  /// Set location of 'first' argument
+  void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; }
+
+  /// Set location of 'count' argument
+  void setCountLoc(SourceLocation Loc) { CountLoc = Loc; }
+
+public:
+  /// Build an AST node for a 'looprange' clause
+  ///
+  /// \param StartLoc     Starting location of the clause.
+  /// \param LParenLoc    Location of '('.
+  /// \param ModifierLoc  Modifier location.
+  /// \param
+  static OMPLoopRangeClause *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
+         SourceLocation FirstLoc, SourceLocation CountLoc,
+         SourceLocation EndLoc, Expr *First, Expr *Count);
+
+  /// Build an empty 'looprange' node for deserialization
+  ///
+  /// \param C      Context of the AST.
+  static OMPLoopRangeClause *CreateEmpty(const ASTContext &C);
+
+  /// Returns the location of '('
+  SourceLocation getLParenLoc() const { return LParenLoc; }
+
+  /// Returns the location of 'first'
+  SourceLocation getFirstLoc() const { return FirstLoc; }
+
+  /// Returns the location of 'count'
+  SourceLocation getCountLoc() const { return CountLoc; }
+
+  /// Returns the argument 'first' or nullptr if not set
+  Expr *getFirst() const { return cast_or_null<Expr>(First); }
+
+  /// Returns the argument 'count' or nullptr if not set
+  Expr *getCount() const { return cast_or_null<Expr>(Count); }
+
+  child_range children() {
+    return child_range(reinterpret_cast<Stmt **>(&First),
+                       reinterpret_cast<Stmt **>(&Count) + 1);
+  }
+
+  const_child_range children() const {
+    auto Children = const_cast<OMPLoopRangeClause *>(this)->children();
+    return const_child_range(Children.begin(), Children.end());
+  }
+
+  child_range used_children() {
+    return child_range(child_iterator(), child_iterator());
+  }
+  const_child_range used_children() const {
+    return const_child_range(const_child_iterator(), const_child_iterator());
+  }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == llvm::omp::OMPC_looprange;
+  }
+};
+
 /// Representation of the 'partial' clause of the '#pragma omp unroll'
 /// directive.
 ///
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 057e9e346ce4e..94066edc64933 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -3400,6 +3400,14 @@ bool RecursiveASTVisitor<Derived>::VisitOMPFullClause(OMPFullClause *C) {
   return true;
 }
 
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPLoopRangeClause(
+    OMPLoopRangeClause *C) {
+  TRY_TO(TraverseStmt(C->getFirst()));
+  TRY_TO(TraverseStmt(C->getCount()));
+  return true;
+}
+
 template <typename Derived>
 bool RecursiveASTVisitor<Derived>::VisitOMPPartialClause(OMPPartialClause *C) {
   TRY_TO(TraverseStmt(C->getFactor()));
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index dc6f797e24ab8..85bde292ca748 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -5572,7 +5572,9 @@ class OMPTileDirective final : public OMPLoopTransformationDirective {
       : OMPLoopTransformationDirective(OMPTileDirectiveClass,
                                        llvm::omp::OMPD_tile, StartLoc, EndLoc,
                                        NumLoops) {
+    // Tiling doubles the original number of loops
     setNumGeneratedLoops(2 * NumLoops);
+    // Produces a single top-level canonical loop nest
     setNumGeneratedLoopNests(1);
   }
 
@@ -5803,9 +5805,9 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective {
       : OMPLoopTransformationDirective(OMPReverseDirectiveClass,
                                        llvm::omp::OMPD_reverse, StartLoc,
                                        EndLoc, 1) {
-
-    setNumGeneratedLoopNests(1);
+    // Reverse produces a single top-level canonical loop nest
     setNumGeneratedLoops(1);
+    setNumGeneratedLoopNests(1);
   }
 
   void setPreInits(Stmt *PreInits) {
@@ -5873,6 +5875,8 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective {
       : OMPLoopTransformationDirective(OMPInterchangeDirectiveClass,
                                        llvm::omp::OMPD_interchange, StartLoc,
                                        EndLoc, NumLoops) {
+    // Interchange produces a single top-level canonical loop
+    // nest, with the exact same amount of total loops
     setNumGeneratedLoops(NumLoops);
     setNumGeneratedLoopNests(1);
   }
@@ -5950,11 +5954,7 @@ class OMPFuseDirective final : public OMPLoopTransformationDirective {
                             unsigned NumLoops)
       : OMPLoopTransformationDirective(OMPFuseDirectiveClass,
                                        llvm::omp::OMPD_fuse, StartLoc, EndLoc,
-                                       NumLoops) {
-    setNumGeneratedLoops(1);
-    // TODO: After implementing the looprange clause, change this logic
-    setNumGeneratedLoopNests(1);
-  }
+                                       NumLoops) {}
 
   void setPreInits(Stmt *PreInits) {
     Data->getChildren()[PreInitsOffset] = PreInits;
@@ -5990,8 +5990,10 @@ class OMPFuseDirective final : public OMPLoopTransformationDirective {
   /// \param C Context of the AST
   /// \param NumClauses Number of clauses to allocate
   /// \param NumLoops Number of associated loops to allocate
+  /// \param NumLoopNests Number of top level loops to allocate
   static OMPFuseDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses,
-                                       unsigned NumLoops);
+                                       unsigned NumLoops,
+                                       unsigned NumLoopNests);
 
   /// Gets the associated loops after the transformation. This is the de-sugared
   /// replacement or nulltpr in dependent contexts.
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f31b6f8a3b26a..191618e7865dc 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11566,6 +11566,11 @@ def err_omp_not_a_loop_sequence : Error <
   "statement after '#pragma omp %0' must be a loop sequence containing canonical loops or loop-generating constructs">;
 def err_omp_empty_loop_sequence : Error <
   "loop sequence after '#pragma omp %0' must contain at least 1 canonical loop or loop-generating construct">;
+def err_omp_invalid_looprange : Error <
+  "loop range in '#pragma omp %0' exceeds the number of available loops: "
+  "range end '%1' is greater than the total number of loops '%2'">;
+def warn_omp_redundant_fusion : Warning <
+  "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">;
 def err_omp_not_for : Error<
   "%select{statement after '#pragma omp %1' must be a for loop|"
   "expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">;
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index e6492b81dfff8..965dcb7da26d8 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -6739,6 +6739,9 @@ class Parser : public CodeCompletionHandler {
                                                 OpenMPClauseKind Kind,
                                                 bool ParseOnly);
 
+  /// Parses the 'looprange' clause of a '#pragma omp fuse' directive.
+  OMPClause *ParseOpenMPLoopRangeClause();
+  
   /// Parses the 'sizes' clause of a '#pragma omp tile' directive.
   OMPClause *ParseOpenMPSizesClause();
 
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index 8d78c2197c89d..f4a075e54cebe 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -921,6 +921,12 @@ class SemaOpenMP : public SemaBase {
                                        SourceLocation StartLoc,
                                        SourceLocation LParenLoc,
                                        SourceLocation EndLoc);
+
+  /// Called on well-form 'looprange' clause after parsing its arguments.
+  OMPClause *
+  ActOnOpenMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc,
+                             SourceLocation LParenLoc, SourceLocation FirstLoc,
+                             SourceLocation CountLoc, SourceLocation EndLoc);
   /// Called on well-formed 'ordered' clause.
   OMPClause *
   ActOnOpenMPOrderedClause(SourceLocation StartLoc, SourceLocation EndLoc,
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 0e5052b944162..0b5808eb100e4 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -1024,6 +1024,26 @@ OMPPartialClause *OMPPartialClause::CreateEmpty(const ASTContext &C) {
   return new (C) OMPPartialClause();
 }
 
+OMPLoopRangeClause *
+OMPLoopRangeClause::Create(const ASTContext &C, SourceLocation StartLoc,
+                           SourceLocation LParenLoc, SourceLocation EndLoc,
+                           SourceLocation FirstLoc, SourceLocation CountLoc,
+                           Expr *First, Expr *Count) {
+  OMPLoopRangeClause *Clause = CreateEmpty(C);
+  Clause->setLocStart(StartLoc);
+  Clause->setLParenLoc(LParenLoc);
+  Clause->setLocEnd(EndLoc);
+  Clause->setFirstLoc(FirstLoc);
+  Clause->setCountLoc(CountLoc);
+  Clause->setFirst(First);
+  Clause->setCount(Count);
+  return Clause;
+}
+
+OMPLoopRangeClause *OMPLoopRangeClause::CreateEmpty(const ASTContext &C) {
+  return new (C) OMPLoopRangeClause();
+}
+
 OMPAllocateClause *OMPAllocateClause::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
     Expr *Allocator, Expr *Alignment, SourceLocation ColonLoc,
@@ -1888,6 +1908,21 @@ void OMPClausePrinter::VisitOMPPartialClause(OMPPartialClause *Node) {
   }
 }
 
+void OMPClausePrinter::VisitOMPLoopRangeClause(OMPLoopRangeClause *Node) {
+  OS << "looprange";
+
+  Expr *First = Node->getFirst();
+  Expr *Count = Node->getCount();
+
+  if (First && Count) {
+    OS << "(";
+    First->printPretty(OS, nullptr, Policy, 0);
+    OS << ",";
+    Count->printPretty(OS, nullptr, Policy, 0);
+    OS << ")";
+  }
+}
+
 void OMPClausePrinter::VisitOMPAllocatorClause(OMPAllocatorClause *Node) {
   OS << "allocator(";
   Node->getAllocator()->printPretty(OS, nullptr, Policy, 0);
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index 4a6133766ef1c..06c987e7f1761 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -524,10 +524,13 @@ OMPFuseDirective *OMPFuseDirective::Create(
 
 OMPFuseDirective *OMPFuseDirective::CreateEmpty(const ASTContext &C,
                                                 unsigned NumClauses,
-                                                unsigned NumLoops) {
-  return createEmptyDirective<OMPFuseDirective>(
+                                                unsigned NumLoops,
+                                                unsigned NumLoopNests) {
+  OMPFuseDirective *Dir = createEmptyDirective<OMPFuseDirective>(
       C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1,
       SourceLocation(), SourceLocation(), NumLoops);
+  Dir->setNumGeneratedLoopNests(NumLoopNests);
+  return Dir;
 }
 
 OMPForSimdDirective *
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 99d426db985e8..9f0ce076c35fa 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -511,6 +511,13 @@ void OMPClauseProfiler::VisitOMPPartialClause(const OMPPartialClause *C) {
     Profiler->VisitExpr(Factor);
 }
 
+void OMPClauseProfiler::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) {
+  if (const Expr *First = C->getFirst())
+    Profiler->VisitExpr(First);
+  if (const Expr *Count = C->getCount())
+    Profiler->VisitExpr(Count);
+}
+
 void OMPClauseProfiler::VisitOMPAllocatorClause(const OMPAllocatorClause *C) {
   if (C->getAllocator())
     Profiler->VisitStmt(C->getAllocator());
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index d172450512f13..18330181f1509 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -248,6 +248,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str,
   case OMPC_affinity:
   case OMPC_when:
   case OMPC_append_args:
+  case OMPC_looprange:
     break;
   default:
     break;
@@ -583,6 +584,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind,
   case OMPC_affinity:
   case OMPC_when:
   case OMPC_append_args:
+  case OMPC_looprange:
     break;
   default:
     break;
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index cfffcdb01a514..ade5192d1968d 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -3041,6 +3041,39 @@ OMPClause *Parser::ParseOpenMPSizesClause() {
                                                  OpenLoc, CloseLoc);
 }
 
+OMPClause *Parser::ParseOpenMPLoopRangeClause() {
+  SourceLocation ClauseNameLoc = ConsumeToken();
+  SourceLocation FirstLoc, CountLoc;
+
+  BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end);
+  if (T.consumeOpen()) {
+    Diag(Tok, diag::err_expected) << tok::l_paren;
+    return nullptr;
+  }
+
+  FirstLoc = Tok.getLocation();
+  ExprResult FirstVal = ParseConstantExpression();
+  if (!FirstVal.isUsable()) {
+    T.skipToEnd();
+    return nullptr;
+  }
+
+  ExpectAndConsume(tok::comma);
+
+  CountLoc = Tok.getLocation();
+  ExprResult CountVal = ParseConstantExpression();
+  if (!CountVal.isUsable()) {
+    T.skipToEnd();
+    return nullptr;
+  }
+
+  T.consumeClose();
+
+  return Actions.OpenMP().ActOnOpenMPLoopRangeClause(
+      FirstVal.get(), CountVal.get(), ClauseNameLoc, T.getOpenLocation(),
+      FirstLoc, CountLoc, T.getCloseLocation());
+}
+
 OMPClause *Parser::ParseOpenMPPermutationClause() {
   SourceLocation ClauseNameLoc, OpenLoc, CloseLoc;
   SmallVector<Expr *> ArgExprs;
@@ -3469,6 +3502,9 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
     }
     Clause = ParseOpenMPClause(CKind, WrongDirective);
     break;
+  case OMPC_looprange:
+    Clause = ParseOpenMPLoopRangeClause();
+    break;  
   default:
     break;
   }
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index bd8bee64a9d2f..556b5cb43b6f8 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -14289,7 +14289,6 @@ bool SemaOpenMP::checkTransformableLoopSequence(
   // and tries to match the input AST to the canonical loop sequence grammar
   // structure
 
-  auto NLCV = NestedLoopCounterVisitor();
   // Helper functions to validate canonical loop sequence grammar is valid
   auto isLoopSequenceDerivation = [](auto *Child) {
     return isa<ForStmt>(Child) || isa<CXXForRangeStmt>(Child) ||
@@ -14392,7 +14391,7 @@ bool SemaOpenMP::checkTransformableLoopSequence(
 
   // Modularized code for handling regular canonical loops
   auto handleRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits,
-                            &LoopSeqSize, &NumLoops, Kind, &TmpDSA, &NLCV,
+                            &LoopSeqSize, &NumLoops, Kind, &TmpDSA,
                             this](Stmt *Child) {
     OriginalInits.emplace_back();
     LoopHelpers.emplace_back();
@@ -14405,8 +14404,11 @@ bool SemaOpenMP::checkTransformableLoopSequence(
           << getOpenMPDirectiveName(Kind);
       return false;
     }
+
     storeLoopStatements(Child);
-    NumLoops += NLCV.TraverseStmt(Child);
+    auto NLCV = NestedLoopCounterVisitor();
+    NLCV.TraverseStmt(Child);
+    NumLoops += NLCV.getNestedLoopCount();
     return true;
   };
 
@@ -15732,6 +15734,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
                                                 Stmt *AStmt,
                                                 SourceLocation StartLoc,
                                                 SourceLocation EndLoc) {
+
   ASTContext &Context = getASTContext();
   DeclContext *CurrContext = SemaRef.CurContext;
   Scope *CurScope = SemaRef.getCurScope();
@@ -15748,7 +15751,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
   SmallVector<SmallVector<Stmt *, 0>> OriginalInits;
 
   unsigned NumLoops;
-  // TODO: Support looprange clause using LoopSeqSize
   unsigned LoopSeqSize;
   if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops,
                                       LoopHelpers, LoopStmts, OriginalInits,
@@ -15757,10 +15759,67 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
   }
 
   // Defer transformation in dependent contexts
+  // The NumLoopNests argument is set to a placeholder (0)
+  // because a dependent context could prevent determining its true value
   if (CurrContext->isDependentContext()) {
     return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses,
-                                    NumLoops, 1, AStmt, nullptr, nullptr);
+                                    NumLoops, 0, AStmt, nullptr, nullptr);
   }
+
+  // Handle clauses, which can be any of the following: [looprange, apply]
+  const OMPLoopRangeClause *LRC =
+      OMPExecutableDirective::getSingleClause<OMPLoopRangeClause>(Clauses);
+
+  // The clause arguments are invalidated if any error arises
+  // such as non-constant or non-positive arguments
+  if (LRC && (!LRC->getFirst() || !LRC->getCount()))
+    return StmtError();
+
+  // Delayed semantic check of LoopRange constraint
+  // Evaluates the loop range arguments and returns the first and count values
+  auto EvaluateLoopRangeArguments = [&Context](Expr *First, Expr *Count,
+                                               uint64_t &FirstVal,
+                                               uint64_t &CountVal) {
+    llvm::APSInt FirstInt = First->EvaluateKnownConstInt(Context);
+    llvm::APSInt CountInt = Count->EvaluateKnownConstInt(Context);
+    FirstVal = FirstInt.getZExtValue();
+    CountVal = CountInt.getZExtValue();
+  };
+
+  // Checks if the loop range is valid
+  auto ValidLoopRange = [](uint64_t FirstVal, uint64_t CountVal,
+                           unsigned NumLoops) -> bool {
+    return FirstVal + CountVal - 1 <= NumLoops;
+  };
+  uint64_t FirstVal = 1, CountVal = 0, LastVal = LoopSeqSize;
+
+  if (LRC) {
+    EvaluateLoopRangeArguments(LRC->getFirst(), LRC->getCount(), FirstVal,
+                               CountVal);
+    if (CountVal == 1)
+      SemaRef.Diag(LRC->getCountLoc(), diag::warn_omp_redundant_fusion)
+          << getOpenMPDirectiveName(OMPD_fuse);
+
+    if (!ValidLoopRange(FirstVal, CountVal, LoopSeqSize)) {
+      SemaRef.Diag(LRC->getFirstLoc(), diag::err_omp_invalid_looprange)
+          << getOpenMPDirectiveName(OMPD_fuse) << (FirstVal + CountVal - 1)
+          << LoopSeqSize;
+      return StmtError();
+    }
+
+    LastVal = FirstVal + CountVal - 1;
+  }
+
+  // Complete fusion generates a single canonical loop nest
+  // However looprange clause generates several loop nests
+  unsigned NumLoopNests = LRC ? LoopSeqSize - CountVal + 1 : 1;
+
+  // Emit a warning for redundant loop fusion when the sequence contains only
+  // one loop.
+  if (LoopSeqSize == 1)
+    SemaRef.Diag(AStmt->getBeginLoc(), diag::warn_omp_redundant_fusion)
+        << getOpenMPDirectiveName(OMPD_fuse);
+
   assert(LoopHelpers.size() == LoopSeqSize &&
          "Expecting loop iteration space dimensionality to match number of "
          "affected loops");
@@ -15774,8 +15833,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
   SmallVector<Stmt *> PreInits;
 
   // Select the type with the largest bit width among all induction variables
-  QualType IVType = LoopHelpers[0].IterationVarRef->getType();
-  for (unsigned int I = 1; I < LoopSeqSize; ++I) {
+  QualType IVType = LoopHelpers[FirstVal - 1].IterationVarRef->getType();
+  for (unsigned int I = FirstVal; I < LastVal; ++I) {
     QualType CurrentIVType = LoopHelpers[I].IterationVarRef->getType();
     if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) {
       IVType = CurrentIVType;
@@ -15824,20 +15883,21 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
 
   // Process each single loop to generate and collect declarations
   // and statements for all helper expressions
-  for (unsigned int I = 0; I < LoopSeqSize; ++I) {
+  for (unsigned int I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) {
     addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I],
                     PreInits);
 
-    auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", I);
-    auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", I);
-    auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", I);
+    auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", J);
+    auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", J);
+    auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", J);
     auto [NIVD, NIDStmt] =
-        CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", I, true);
+        CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", J, true);
     auto [IVVD, IVDStmt] =
-        CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", I);
+        CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", J);
 
     if (!LBVD || !STVD || !NIVD || !IVVD)
-      return StmtError();
+      assert(LBVD && STVD && NIVD && IVVD &&
+             "OpenMP Fuse Helper variables creation failed");
 
     UBVarDecls.push_back(UBVD);
     LBVarDecls.push_back(LBVD);
@@ -15912,8 +15972,9 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
   //   omp.fuse.max = max(omp.temp1, omp.temp0)
 
   ExprResult MaxExpr;
-  for (unsigned I = 0; I < LoopSeqSize; ++I) {
-    DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[I]);
+  // I is the true
+  for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) {
+    DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[J]);
     QualType NITy = NIRef->getType();
 
     if (MaxExpr.isUnset()) {
@@ -15921,7 +15982,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
       MaxExpr = NIRef;
     } else {
       // Create a new acummulator variable t_i = MaxExpr
-      std::string TempName = (Twine(".omp.temp.") + Twine(I)).str();
+      std::string TempName = (Twine(".omp.temp.") + Twine(J)).str();
       VarDecl *TempDecl =
           buildVarDecl(SemaRef, {}, NITy, TempName, nullptr, nullptr);
       TempDecl->setInit(MaxExpr.get());
@@ -15944,7 +16005,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
       if (!Comparison.isUsable())
         return StmtError();
 
-      DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[I]);
+      DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[J]);
       // Update MaxExpr using a conditional expression to hold the max value
       MaxExpr = new (Context) ConditionalOperator(
           Comparison.get(), SourceLocation(), TempRef2, SourceLocation(),
@@ -15997,23 +16058,21 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
 
   CompoundStmt *FusedBody = nullptr;
   SmallVector<Stmt *, 4> FusedBodyStmts;
-  for (unsigned I = 0; I < LoopSeqSize; ++I) {
-
+  for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) {
     // Assingment of the original sub-loop index to compute the logical index
     // IV_k = LB_k + omp.fuse.index * ST_k
-
     ExprResult IdxExpr =
         SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul,
-                           MakeVarDeclRef(STVarDecls[I]), MakeIVRef());
+                           MakeVarDeclRef(STVarDecls[J]), MakeIVRef());
     if (!IdxExpr.isUsable())
       return StmtError();
     IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Add,
-                                 MakeVarDeclRef(LBVarDecls[I]), IdxExpr.get());
+                                 MakeVarDeclRef(LBVarDecls[J]), IdxExpr.get());
 
     if (!IdxExpr.isUsable())
       return StmtError();
     IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Assign,
-                                 MakeVarDeclRef(IVVarDecls[I]), IdxExpr.get());
+                                 MakeVarDeclRef(IVVarDecls[J]), IdxExpr.get());
     if (!IdxExpr.isUsable())
       return StmtError();
 
@@ -16028,7 +16087,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
     Stmt *Body = (isa<ForStmt>(LoopStmts[I]))
                      ? cast<ForStmt>(LoopStmts[I])->getBody()
                      : cast<CXXForRangeStmt>(LoopStmts[I])->getBody();
-
     BodyStmts.push_back(Body);
 
     CompoundStmt *CombinedBody =
@@ -16036,7 +16094,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
                              SourceLocation(), SourceLocation());
     ExprResult Condition =
         SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, MakeIVRef(),
-                           MakeVarDeclRef(NIVarDecls[I]));
+                           MakeVarDeclRef(NIVarDecls[J]));
 
     if (!Condition.isUsable())
       return StmtError();
@@ -16057,8 +16115,26 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
               FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(),
               IncrExpr.get()->getEndLoc());
 
+  // In the case of looprange, the result of fuse won't simply
+  // be a single loop (ForStmt), but rather a loop sequence
+  // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop
+  // and the post-fusion loops, preserving its original order.
+  Stmt *FusionStmt = FusedForStmt;
+  if (LRC) {
+    SmallVector<Stmt *, 4> FinalLoops;
+    // Gather all the pre-fusion loops
+    for (unsigned I = 0; I < FirstVal - 1; ++I)
+      FinalLoops.push_back(LoopStmts[I]);
+    // Gather the fused loop
+    FinalLoops.push_back(FusedForStmt);
+    // Gather all the post-fusion loops
+    for (unsigned I = FirstVal + CountVal - 1; I < LoopSeqSize; ++I)
+      FinalLoops.push_back(LoopStmts[I]);
+    FusionStmt = CompoundStmt::Create(Context, FinalLoops, FPOptionsOverride(),
+                                      SourceLocation(), SourceLocation());
+  }
   return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, NumLoops,
-                                  1, AStmt, FusedForStmt,
+                                  NumLoopNests, AStmt, FusionStmt,
                                   buildPreInits(Context, PreInits));
 }
 
@@ -17181,6 +17257,31 @@ OMPClause *SemaOpenMP::ActOnOpenMPPartialClause(Expr *FactorExpr,
                                   FactorExpr);
 }
 
+OMPClause *SemaOpenMP::ActOnOpenMPLoopRangeClause(
+    Expr *First, Expr *Count, SourceLocation StartLoc, SourceLocation LParenLoc,
+    SourceLocation FirstLoc, SourceLocation CountLoc, SourceLocation EndLoc) {
+
+  // OpenMP [6.0, Restrictions]
+  // First and Count must be integer expressions with positive value
+  ExprResult FirstVal =
+      VerifyPositiveIntegerConstantInClause(First, OMPC_looprange);
+  if (FirstVal.isInvalid())
+    First = nullptr;
+
+  ExprResult CountVal =
+      VerifyPositiveIntegerConstantInClause(Count, OMPC_looprange);
+  if (CountVal.isInvalid())
+    Count = nullptr;
+
+  // OpenMP [6.0, Restrictions]
+  // first + count - 1 must not evaluate to a value greater than the
+  // loop sequence length of the associated canonical loop sequence.
+  // This check must be performed afterwards due to the delayed
+  // parsing and computation of the associated loop sequence
+  return OMPLoopRangeClause::Create(getASTContext(), StartLoc, LParenLoc,
+                                    FirstLoc, CountLoc, EndLoc, First, Count);
+}
+
 OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc,
                                               SourceLocation LParenLoc,
                                               SourceLocation EndLoc) {
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 034b0c8243667..d70e2a3874c07 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1775,6 +1775,14 @@ class TreeTransform {
                                                        LParenLoc, EndLoc);
   }
 
+  OMPClause *
+  RebuildOMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc,
+                            SourceLocation LParenLoc, SourceLocation FirstLoc,
+                            SourceLocation CountLoc, SourceLocation EndLoc) {
+    return getSema().OpenMP().ActOnOpenMPLoopRangeClause(
+        First, Count, StartLoc, LParenLoc, FirstLoc, CountLoc, EndLoc);
+  }
+
   /// Build a new OpenMP 'allocator' clause.
   ///
   /// By default, performs semantic analysis to build the new OpenMP clause.
@@ -10569,6 +10577,31 @@ TreeTransform<Derived>::TransformOMPPartialClause(OMPPartialClause *C) {
                                  C->getEndLoc());
 }
 
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPLoopRangeClause(OMPLoopRangeClause *C) {
+  ExprResult F = getDerived().TransformExpr(C->getFirst());
+  if (F.isInvalid())
+    return nullptr;
+
+  ExprResult Cn = getDerived().TransformExpr(C->getCount());
+  if (Cn.isInvalid())
+    return nullptr;
+
+  Expr *First = F.get();
+  Expr *Count = Cn.get();
+
+  bool Changed = (First != C->getFirst()) || (Count != C->getCount());
+
+  // If no changes and AlwaysRebuild() is false, return the original clause
+  if (!Changed && !getDerived().AlwaysRebuild())
+    return C;
+
+  return RebuildOMPLoopRangeClause(First, Count, C->getBeginLoc(),
+                                   C->getLParenLoc(), C->getFirstLoc(),
+                                   C->getCountLoc(), C->getEndLoc());
+}
+
 template <typename Derived>
 OMPClause *
 TreeTransform<Derived>::TransformOMPCollapseClause(OMPCollapseClause *C) {
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index d068f5e163176..8591eb9394fa5 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -11088,6 +11088,9 @@ OMPClause *OMPClauseReader::readClause() {
   case llvm::omp::OMPC_partial:
     C = OMPPartialClause::CreateEmpty(Context);
     break;
+  case llvm::omp::OMPC_looprange:
+    C = OMPLoopRangeClause::CreateEmpty(Context);
+    break;
   case llvm::omp::OMPC_allocator:
     C = new (Context) OMPAllocatorClause();
     break;
@@ -11489,6 +11492,14 @@ void OMPClauseReader::VisitOMPPartialClause(OMPPartialClause *C) {
   C->setLParenLoc(Record.readSourceLocation());
 }
 
+void OMPClauseReader::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) {
+  C->setFirst(Record.readSubExpr());
+  C->setCount(Record.readSubExpr());
+  C->setLParenLoc(Record.readSourceLocation());
+  C->setFirstLoc(Record.readSourceLocation());
+  C->setCountLoc(Record.readSourceLocation());
+}
+
 void OMPClauseReader::VisitOMPAllocatorClause(OMPAllocatorClause *C) {
   C->setAllocator(Record.readExpr());
   C->setLParenLoc(Record.readSourceLocation());
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 6762d11d6b73e..a301e1c0b0e32 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -3621,7 +3621,9 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
     case STMT_OMP_FUSE_DIRECTIVE: {
       unsigned NumLoops = Record[ASTStmtReader::NumStmtFields];
       unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1];
-      S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops);
+      unsigned NumLoopNests = Record[ASTStmtReader::NumStmtFields + 2];
+      S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops,
+                                        NumLoopNests);
       break;
     }
 
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 1b3d3c22aa9f5..8548f7e50d34b 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -7782,6 +7782,14 @@ void OMPClauseWriter::VisitOMPPartialClause(OMPPartialClause *C) {
   Record.AddSourceLocation(C->getLParenLoc());
 }
 
+void OMPClauseWriter::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) {
+  Record.AddStmt(C->getFirst());
+  Record.AddStmt(C->getCount());
+  Record.AddSourceLocation(C->getLParenLoc());
+  Record.AddSourceLocation(C->getFirstLoc());
+  Record.AddSourceLocation(C->getCountLoc());
+}
+
 void OMPClauseWriter::VisitOMPAllocatorClause(OMPAllocatorClause *C) {
   Record.AddStmt(C->getAllocator());
   Record.AddSourceLocation(C->getLParenLoc());
diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp
index 43ce815dab024..ac4f0d38a9c68 100644
--- a/clang/test/OpenMP/fuse_ast_print.cpp
+++ b/clang/test/OpenMP/fuse_ast_print.cpp
@@ -271,6 +271,73 @@ void foo7() {
 
 }
 
+// PRINT-LABEL: void foo8(
+// DUMP-LABEL: FunctionDecl {{.*}} foo8
+void foo8() {
+    // PRINT: #pragma omp fuse looprange(2,2)
+    // DUMP:  OMPFuseDirective
+    // DUMP: OMPLooprangeClause
+    #pragma omp fuse looprange(2,2)
+    // PRINT: {
+    // DUMP: CompoundStmt       
+    {
+        // PRINT: for (int i = 0; i < 10; i += 2)
+        // DUMP: ForStmt
+        for (int i = 0; i < 10; i += 2)
+            // PRINT: body(i)
+            // DUMP: CallExpr
+            body(i);
+        // PRINT: for (int j = 10; j > 0; --j)
+        // DUMP: ForStmt
+        for (int j = 10; j > 0; --j)
+            // PRINT: body(j)
+            // DUMP: CallExpr
+            body(j);
+        // PRINT: for (int k = 0; k <= 10; ++k)
+        // DUMP: ForStmt
+        for (int k = 0; k <= 10; ++k)
+            // PRINT: body(k)
+            // DUMP: CallExpr
+            body(k);
+
+    }
+
+}
+
+//PRINT-LABEL: void foo9(
+//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo9
+//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} F
+//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} C
+template<int F, int C> 
+void foo9() {
+    // PRINT:  #pragma omp fuse looprange(F,C)
+    // DUMP: OMPFuseDirective
+    // DUMP: OMPLooprangeClause
+    #pragma omp fuse looprange(F,C)
+    // PRINT: {
+    // DUMP: CompoundStmt       
+    {
+        // PRINT: for (int i = 0; i < 10; i += 2)
+        // DUMP: ForStmt
+        for (int i = 0; i < 10; i += 2)
+            // PRINT: body(i)
+            // DUMP: CallExpr
+            body(i);
+        // PRINT: for (int j = 10; j > 0; --j)
+        // DUMP: ForStmt
+        for (int j = 10; j > 0; --j)
+            // PRINT: body(j)
+            // DUMP: CallExpr
+            body(j);
+
+    }
+}
+
+// Also test instantiating the template.
+void tfoo9() {
+    foo9<1, 2>();
+}
+
 
 
 
diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp
index 6c1e21092da43..d9500bed3ce31 100644
--- a/clang/test/OpenMP/fuse_codegen.cpp
+++ b/clang/test/OpenMP/fuse_codegen.cpp
@@ -53,6 +53,18 @@ extern "C" void foo3() {
     }
 }
 
+extern "C" void foo4() {
+    double arr[256];
+
+    #pragma omp fuse looprange(2,2)
+    {
+        for(int i = 0; i < 128; ++i) body(i);
+        for(int j = 0; j < 256; j+=2) body(j);
+        for(int k = 0; k < 64; ++k) body(k);
+        for(int c = 42; auto &&v: arr) body(c,v);
+    }
+}
+
 
 #endif
 // CHECK1-LABEL: define dso_local void @body(
@@ -777,6 +789,157 @@ extern "C" void foo3() {
 // CHECK1-NEXT:    ret void
 //
 //
+// CHECK1-LABEL: define dso_local void @foo4(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
+// CHECK1-NEXT:    [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK1-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[K:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[C:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__END2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[V:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK1-NEXT:    store i32 127, ptr [[DOTOMP_UB0]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[K]], align 4
+// CHECK1-NEXT:    store i32 63, ptr [[DOTOMP_UB1]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    store i32 64, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK1-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1:       [[COND_TRUE]]:
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END:.*]]
+// CHECK1:       [[COND_FALSE]]:
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END]]
+// CHECK1:       [[COND_END]]:
+// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK1:       [[FOR_COND]]:
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128
+// CHECK1-NEXT:    br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1:       [[FOR_BODY]]:
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP6]])
+// CHECK1-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK1:       [[FOR_INC]]:
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[I]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK1:       [[FOR_END]]:
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND2:.*]]
+// CHECK1:       [[FOR_COND2]]:
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT:    br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]]
+// CHECK1:       [[FOR_BODY4]]:
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]]
+// CHECK1-NEXT:    br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK1:       [[IF_THEN]]:
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]]
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]]
+// CHECK1-NEXT:    store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2
+// CHECK1-NEXT:    [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
+// CHECK1-NEXT:    store i32 [[ADD7]], ptr [[J]], align 4
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP16]])
+// CHECK1-NEXT:    br label %[[IF_END]]
+// CHECK1:       [[IF_END]]:
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]]
+// CHECK1-NEXT:    br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]]
+// CHECK1:       [[IF_THEN9]]:
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]]
+// CHECK1-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]]
+// CHECK1-NEXT:    store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1
+// CHECK1-NEXT:    [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
+// CHECK1-NEXT:    store i32 [[ADD13]], ptr [[K]], align 4
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP23]])
+// CHECK1-NEXT:    br label %[[IF_END14]]
+// CHECK1:       [[IF_END14]]:
+// CHECK1-NEXT:    br label %[[FOR_INC15:.*]]
+// CHECK1:       [[FOR_INC15]]:
+// CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[INC16:%.*]] = add nsw i32 [[TMP24]], 1
+// CHECK1-NEXT:    store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND2]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK1:       [[FOR_END17]]:
+// CHECK1-NEXT:    store i32 42, ptr [[C]], align 4
+// CHECK1-NEXT:    store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0
+// CHECK1-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256
+// CHECK1-NEXT:    store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK1-NEXT:    br label %[[FOR_COND19:.*]]
+// CHECK1:       [[FOR_COND19]]:
+// CHECK1-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK1-NEXT:    [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]]
+// CHECK1-NEXT:    br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]]
+// CHECK1:       [[FOR_BODY21]]:
+// CHECK1-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    store ptr [[TMP29]], ptr [[V]], align 8
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[C]], align 4
+// CHECK1-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK1-NEXT:    [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]])
+// CHECK1-NEXT:    br label %[[FOR_INC22:.*]]
+// CHECK1:       [[FOR_INC22]]:
+// CHECK1-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1
+// CHECK1-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    br label %[[FOR_COND19]]
+// CHECK1:       [[FOR_END23]]:
+// CHECK1-NEXT:    ret void
+//
+//
 // CHECK2-LABEL: define dso_local void @body(
 // CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] {
 // CHECK2-NEXT:  [[ENTRY:.*:]]
@@ -1259,6 +1422,157 @@ extern "C" void foo3() {
 // CHECK2-NEXT:    ret void
 //
 //
+// CHECK2-LABEL: define dso_local void @foo4(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT:  [[ENTRY:.*:]]
+// CHECK2-NEXT:    [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK2-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[K:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[C:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__END2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[V:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK2-NEXT:    store i32 127, ptr [[DOTOMP_UB0]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[K]], align 4
+// CHECK2-NEXT:    store i32 63, ptr [[DOTOMP_UB1]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    store i32 64, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK2-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2:       [[COND_TRUE]]:
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END:.*]]
+// CHECK2:       [[COND_FALSE]]:
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END]]
+// CHECK2:       [[COND_END]]:
+// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK2-NEXT:    store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK2:       [[FOR_COND]]:
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128
+// CHECK2-NEXT:    br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2:       [[FOR_BODY]]:
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP6]])
+// CHECK2-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK2:       [[FOR_INC]]:
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[I]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK2:       [[FOR_END]]:
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND2:.*]]
+// CHECK2:       [[FOR_COND2]]:
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]]
+// CHECK2-NEXT:    br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]]
+// CHECK2:       [[FOR_BODY4]]:
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]]
+// CHECK2-NEXT:    br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK2:       [[IF_THEN]]:
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]]
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]]
+// CHECK2-NEXT:    store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2
+// CHECK2-NEXT:    [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
+// CHECK2-NEXT:    store i32 [[ADD7]], ptr [[J]], align 4
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP16]])
+// CHECK2-NEXT:    br label %[[IF_END]]
+// CHECK2:       [[IF_END]]:
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]]
+// CHECK2-NEXT:    br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]]
+// CHECK2:       [[IF_THEN9]]:
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]]
+// CHECK2-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]]
+// CHECK2-NEXT:    store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1
+// CHECK2-NEXT:    [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
+// CHECK2-NEXT:    store i32 [[ADD13]], ptr [[K]], align 4
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP23]])
+// CHECK2-NEXT:    br label %[[IF_END14]]
+// CHECK2:       [[IF_END14]]:
+// CHECK2-NEXT:    br label %[[FOR_INC15:.*]]
+// CHECK2:       [[FOR_INC15]]:
+// CHECK2-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[INC16:%.*]] = add nsw i32 [[TMP24]], 1
+// CHECK2-NEXT:    store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND2]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK2:       [[FOR_END17]]:
+// CHECK2-NEXT:    store i32 42, ptr [[C]], align 4
+// CHECK2-NEXT:    store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0
+// CHECK2-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256
+// CHECK2-NEXT:    store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK2-NEXT:    br label %[[FOR_COND19:.*]]
+// CHECK2:       [[FOR_COND19]]:
+// CHECK2-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK2-NEXT:    [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]]
+// CHECK2-NEXT:    br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]]
+// CHECK2:       [[FOR_BODY21]]:
+// CHECK2-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    store ptr [[TMP29]], ptr [[V]], align 8
+// CHECK2-NEXT:    [[TMP30:%.*]] = load i32, ptr [[C]], align 4
+// CHECK2-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK2-NEXT:    [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]])
+// CHECK2-NEXT:    br label %[[FOR_INC22:.*]]
+// CHECK2:       [[FOR_INC22]]:
+// CHECK2-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1
+// CHECK2-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    br label %[[FOR_COND19]]
+// CHECK2:       [[FOR_END23]]:
+// CHECK2-NEXT:    ret void
+//
+//
 // CHECK2-LABEL: define dso_local void @tfoo2(
 // CHECK2-SAME: ) #[[ATTR0]] {
 // CHECK2-NEXT:  [[ENTRY:.*:]]
@@ -1494,7 +1808,7 @@ extern "C" void foo3() {
 // CHECK2-NEXT:    [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
 // CHECK2-NEXT:    [[INC:%.*]] = add i32 [[TMP71]], 1
 // CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
 // CHECK2:       [[FOR_END]]:
 // CHECK2-NEXT:    ret void
 //
@@ -1503,9 +1817,13 @@ extern "C" void foo3() {
 // CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"}
 // CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]}
 // CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
+// CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]}
+// CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
 //.
 // CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
 // CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"}
 // CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]}
 // CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
+// CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]}
+// CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
 //.
diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp
index 50dedfd2c0dc6..2a2491d008a0b 100644
--- a/clang/test/OpenMP/fuse_messages.cpp
+++ b/clang/test/OpenMP/fuse_messages.cpp
@@ -33,6 +33,8 @@ void func() {
     {
         for (int i = 0; i < 7; ++i)
             ;
+        for(int j = 0; j < 100; ++j);
+
     }
 
 
@@ -41,6 +43,8 @@ void func() {
     {
         for (int i = 0; i < 7; ++i)
             ;
+        for(int j = 0; j < 100; ++j);
+
     }
 
     //expected-error at +4 {{loop after '#pragma omp fuse' is not in canonical form}}
@@ -50,6 +54,7 @@ void func() {
         for(int i = 0; i < 10; i*=2) {
             ;
         }
+        for(int j = 0; j < 100; ++j);
     }
 
     //expected-error at +2 {{loop sequence after '#pragma omp fuse' must contain at least 1 canonical loop or loop-generating construct}}
@@ -73,4 +78,109 @@ void func() {
         for(unsigned int j = 0; j < 10; ++j);
         for(long long k = 0; k < 100; ++k);
     }
-}
\ No newline at end of file
+
+    //expected-warning at +2 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}}
+    #pragma omp fuse
+    {
+        for(int i = 0; i < 10; ++i);
+    }
+
+    //expected-warning at +1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}}
+    #pragma omp fuse looprange(1, 1)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+    }
+
+    //expected-error at +1 {{argument to 'looprange' clause must be a strictly positive integer value}}
+    #pragma omp fuse looprange(1, -1)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+    }
+
+    //expected-error at +1 {{argument to 'looprange' clause must be a strictly positive integer value}}
+    #pragma omp fuse looprange(1, 0)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+    }
+
+    const int x = 1;
+    constexpr int y = 4;
+    //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '4' is greater than the total number of loops '3'}}
+    #pragma omp fuse looprange(x,y)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+        for(int k = 0; k < 50; ++k);
+    }
+
+    //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '420' is greater than the total number of loops '3'}}
+    #pragma omp fuse looprange(1,420)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+        for(int k = 0; k < 50; ++k);
+    }
+}
+
+// In a template context, but expression itself not instantiation-dependent
+template <typename T>
+static void templated_func() {
+
+    //expected-warning at +1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}}
+    #pragma omp fuse looprange(2,1)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+        for(int k = 0; k < 50; ++k);
+    }
+
+    //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '5' is greater than the total number of loops '3'}}
+    #pragma omp fuse looprange(3,3)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+        for(int k = 0; k < 50; ++k);
+    }
+
+}
+
+template <int V> 
+static void templated_func_value_dependent() {
+
+    //expected-warning at +1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}}
+    #pragma omp fuse looprange(V,1)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+        for(int k = 0; k < 50; ++k);
+    }
+}
+
+template <typename T> 
+static void templated_func_type_dependent() {
+    constexpr T s = 1;
+
+    //expected-error at +1 {{argument to 'looprange' clause must be a strictly positive integer value}}
+    #pragma omp fuse looprange(s,s-1)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+        for(int k = 0; k < 50; ++k);
+    }
+}
+
+
+void template_inst() {
+    // expected-note at +1 {{in instantiation of function template specialization 'templated_func<int>' requested here}}
+    templated_func<int>();
+    // expected-note at +1 {{in instantiation of function template specialization 'templated_func_value_dependent<1>' requested here}}
+    templated_func_value_dependent<1>();
+    // expected-note at +1 {{in instantiation of function template specialization 'templated_func_type_dependent<int>' requested here}}
+    templated_func_type_dependent<int>();
+
+}
+
+
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index fd788ac3d69d4..38f5183b146ee 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2412,6 +2412,11 @@ void OMPClauseEnqueue::VisitOMPPartialClause(const OMPPartialClause *C) {
   Visitor->AddStmt(C->getFactor());
 }
 
+void OMPClauseEnqueue::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) {
+  Visitor->AddStmt(C->getFirst());
+  Visitor->AddStmt(C->getCount());
+}
+
 void OMPClauseEnqueue::VisitOMPAllocatorClause(const OMPAllocatorClause *C) {
   Visitor->AddStmt(C->getAllocator());
 }
diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
index e0714e812e5cd..dd51274c1aaf5 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
@@ -1233,6 +1233,15 @@ struct WriteT {
   using EmptyTrait = std::true_type;
 };
 
+// V6: [6.4.7] Looprange clause
+template <typename T, typename I, typename E> struct LoopRangeT {
+  using Begin = E;
+  using End = E;
+
+  using TupleTrait = std::true_type;
+  std::tuple<Begin, End> t;
+};
+
 // ---
 
 template <typename T, typename I, typename E>
@@ -1263,9 +1272,10 @@ using TupleClausesT =
                  DefaultmapT<T, I, E>, DeviceT<T, I, E>, DistScheduleT<T, I, E>,
                  DoacrossT<T, I, E>, FromT<T, I, E>, GrainsizeT<T, I, E>,
                  IfT<T, I, E>, InitT<T, I, E>, InReductionT<T, I, E>,
-                 LastprivateT<T, I, E>, LinearT<T, I, E>, MapT<T, I, E>,
-                 NumTasksT<T, I, E>, OrderT<T, I, E>, ReductionT<T, I, E>,
-                 ScheduleT<T, I, E>, TaskReductionT<T, I, E>, ToT<T, I, E>>;
+                 LastprivateT<T, I, E>, LinearT<T, I, E>, LoopRangeT<T, I, E>,
+                 MapT<T, I, E>, NumTasksT<T, I, E>, OrderT<T, I, E>,
+                 ReductionT<T, I, E>, ScheduleT<T, I, E>,
+                 TaskReductionT<T, I, E>, ToT<T, I, E>>;
 
 template <typename T, typename I, typename E>
 using UnionClausesT = std::variant<DependT<T, I, E>>;
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 8286cfcadaafd..ae19385c022d0 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -271,6 +271,9 @@ def OMPC_Linear : Clause<"linear"> {
 def OMPC_Link : Clause<"link"> {
   let flangClass = "OmpObjectList";
 }
+def OMPC_LoopRange : Clause<"looprange"> {
+  let clangClass = "OMPLoopRangeClause";
+}
 def OMPC_Map : Clause<"map"> {
   let clangClass = "OMPMapClause";
   let flangClass = "OmpMapClause";
@@ -853,6 +856,9 @@ def OMP_For : Directive<"for"> {
   let languages = [L_C];
 }
 def OMP_Fuse : Directive<"fuse"> {
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_LoopRange, 60>
+  ];
   let association = AS_Loop;
   let category = CA_Executable;
 }

>From c1e5fc3fe2ac7f126a76b44906b30029e3cc797b Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:30:39 +0000
Subject: [PATCH 3/9] Addef fuse to documentation

---
 clang/docs/OpenMPSupport.rst | 2 ++
 clang/docs/ReleaseNotes.rst  | 1 +
 2 files changed, 3 insertions(+)

diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst
index d6507071d4693..5f0e363792b32 100644
--- a/clang/docs/OpenMPSupport.rst
+++ b/clang/docs/OpenMPSupport.rst
@@ -376,6 +376,8 @@ implementation.
 +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
 | loop stripe transformation                                  | :good:`done`              | https://github.com/llvm/llvm-project/pull/119891                                                     |
 +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| loop fuse transformation                                    | :good:`done`              | :none:`unclaimed`        |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
 | work distribute construct                                   | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
 +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
 | task_iteration                                              | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 573ae97bff710..2188e42dc705c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1016,6 +1016,7 @@ OpenMP Support
   open parenthesis. (#GH139665)
 - An error is now emitted when OpenMP ``collapse`` and ``ordered`` clauses have
   an argument larger than what can fit within a 64-bit integer.
+- Added support for 'omp fuse' directive.
 
 Improvements
 ^^^^^^^^^^^^

>From 33119f77c07cc3ecbb5b3360fd8f63a958e808c1 Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:43:41 +0000
Subject: [PATCH 4/9] Refactored preinits handling and improved coverage

---
 clang/docs/OpenMPSupport.rst          |    2 +-
 clang/include/clang/AST/StmtOpenMP.h  |    5 +-
 clang/include/clang/Sema/SemaOpenMP.h |   96 +-
 clang/lib/AST/StmtOpenMP.cpp          |   13 +
 clang/lib/Basic/OpenMPKinds.cpp       |    3 +-
 clang/lib/CodeGen/CGExpr.cpp          |    2 +
 clang/lib/CodeGen/CodeGenFunction.h   |    4 +
 clang/lib/Sema/SemaOpenMP.cpp         |  588 ++++---
 clang/test/OpenMP/fuse_ast_print.cpp  |   55 +
 clang/test/OpenMP/fuse_codegen.cpp    | 2117 +++++++++++++++----------
 10 files changed, 1862 insertions(+), 1023 deletions(-)

diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst
index 5f0e363792b32..b39f9d3634a63 100644
--- a/clang/docs/OpenMPSupport.rst
+++ b/clang/docs/OpenMPSupport.rst
@@ -376,7 +376,7 @@ implementation.
 +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
 | loop stripe transformation                                  | :good:`done`              | https://github.com/llvm/llvm-project/pull/119891                                                     |
 +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| loop fuse transformation                                    | :good:`done`              | :none:`unclaimed`        |                                                                          |
+| loop fuse transformation                                    | :good:`prototyped`        | :none:`unclaimed`         |                                                                          |
 +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
 | work distribute construct                                   | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
 +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index 85bde292ca748..b6a948a8c6020 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -1005,8 +1005,7 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective {
     Stmt::StmtClass C = T->getStmtClass();
     return C == OMPTileDirectiveClass || C == OMPUnrollDirectiveClass ||
            C == OMPReverseDirectiveClass || C == OMPInterchangeDirectiveClass ||
-           C == OMPStripeDirectiveClass ||
-           C == OMPFuseDirectiveClass;
+           C == OMPStripeDirectiveClass || C == OMPFuseDirectiveClass;
   }
 };
 
@@ -5653,6 +5652,8 @@ class OMPStripeDirective final : public OMPLoopTransformationDirective {
                                        llvm::omp::OMPD_stripe, StartLoc, EndLoc,
                                        NumLoops) {
     setNumGeneratedLoops(2 * NumLoops);
+    // Similar to Tile, it only generates a single top level loop nest
+    setNumGeneratedLoopNests(1);
   }
 
   void setPreInits(Stmt *PreInits) {
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index f4a075e54cebe..ac4cbe3709a0d 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -1493,16 +1493,96 @@ class SemaOpenMP : public SemaBase {
       SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
       Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits);
 
-  /// Analyzes and checks a loop sequence for use by a loop transformation
+  /// @brief Categories of loops encountered during semantic OpenMP loop
+  /// analysis
+  ///
+  /// This enumeration identifies the structural category of a loop or sequence
+  /// of loops analyzed in the context of OpenMP transformations and directives.
+  /// This categorization helps differentiate between original source loops
+  /// and the structures resulting from applying OpenMP loop transformations.
+  enum class OMPLoopCategory {
+
+    /// @var OMPLoopCategory::RegularLoop
+    /// Represents a standard canonical loop nest found in the
+    /// original source code or an intact loop after transformations
+    /// (i.e Post/Pre loops of a loopranged fusion)
+    RegularLoop,
+
+    /// @var OMPLoopCategory::TransformSingleLoop
+    /// Represents the resulting loop structure when an OpenMP loop
+    //  transformation, generates a single, top-level loop
+    TransformSingleLoop,
+
+    /// @var OMPLoopCategory::TransformLoopSequence
+    /// Represents the resulting loop structure when an OpenMP loop
+    /// transformation
+    /// generates a sequence of two or more canonical loop nests
+    TransformLoopSequence
+  };
+
+  /// The main recursive process of `checkTransformableLoopSequence` that
+  /// performs grammatical parsing of a canonical loop sequence. It extracts
+  /// key information, such as the number of top-level loops, loop statements,
+  /// helper expressions, and other relevant loop-related data, all in a single
+  /// execution to avoid redundant traversals. This analysis flattens inner
+  /// Loop Sequences
+  ///
+  /// \param LoopSeqStmt    The AST of the original statement.
+  /// \param LoopSeqSize    [out] Number of top level canonical loops.
+  /// \param NumLoops       [out] Number of total canonical loops (nested too).
+  /// \param LoopHelpers    [out] The multiple loop analyses results.
+  /// \param ForStmts       [out] The multiple Stmt of each For loop.
+  /// \param OriginalInits  [out] The raw original initialization statements
+  ///                       of each belonging to a loop of the loop sequence
+  /// \param TransformPreInits [out] The multiple collection of statements and
+  ///                       declarations that must have been executed/declared
+  ///                       before entering the loop (each belonging to a
+  ///                       particular loop transformation, nullptr otherwise)
+  /// \param LoopSequencePreInits [out] Additional general collection of loop
+  ///                       transformation related statements and declarations
+  ///                       not bounded to a particular loop that must be
+  ///                       executed before entering the loop transformation
+  /// \param LoopCategories [out] A sequence of OMPLoopCategory values,
+  ///                       one for each loop or loop transformation node
+  ///                       successfully analyzed.
+  /// \param Context
+  /// \param Kind           The loop transformation directive kind.
+  /// \return Whether the original statement is both syntactically and
+  /// semantically correct according to OpenMP 6.0 canonical loop
+  /// sequence definition.
+  bool analyzeLoopSequence(
+      Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops,
+      SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
+      SmallVectorImpl<Stmt *> &ForStmts,
+      SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
+      SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
+      SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+      SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context,
+      OpenMPDirectiveKind Kind);
+
+  /// Validates and checks whether a loop sequence can be transformed according
+  /// to the given directive, providing necessary setup and initialization
+  /// (Driver function) before recursion using `analyzeLoopSequence`.
   ///
   /// \param Kind           The loop transformation directive kind.
-  /// \param NumLoops       [out] Number of total canonical loops
-  /// \param LoopSeqSize    [out] Number of top level canonical loops
+  /// \param AStmt          The AST of the original statement
+  /// \param LoopSeqSize    [out] Number of top level canonical loops.
+  /// \param NumLoops       [out] Number of total canonical loops (nested too)
   /// \param LoopHelpers    [out] The multiple loop analyses results.
-  /// \param LoopStmts      [out] The multiple Stmt of each For loop.
-  /// \param OriginalInits  [out] The multiple collection of statements and
+  /// \param ForStmts       [out] The multiple Stmt of each For loop.
+  /// \param OriginalInits  [out] The raw original initialization statements
+  ///                       of each belonging to a loop of the loop sequence
+  /// \param TransformsPreInits [out] The multiple collection of statements and
   ///                       declarations that must have been executed/declared
-  ///                       before entering the loop.
+  ///                       before entering the loop (each belonging to a
+  ///                       particular loop transformation, nullptr otherwise)
+  /// \param LoopSequencePreInits [out] Additional general collection of loop
+  ///                       transformation related statements and declarations
+  ///                       not bounded to a particular loop that must be
+  ///                       executed before entering the loop transformation
+  /// \param LoopCategories [out] A sequence of OMPLoopCategory values,
+  ///                       one for each loop or loop transformation node
+  ///                       successfully analyzed.
   /// \param Context
   /// \return Whether there was an absence of errors or not
   bool checkTransformableLoopSequence(
@@ -1511,7 +1591,9 @@ class SemaOpenMP : public SemaBase {
       SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
       SmallVectorImpl<Stmt *> &ForStmts,
       SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
-      ASTContext &Context);
+      SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
+      SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+      SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context);
 
   /// Helper to keep information about the current `omp begin/end declare
   /// variant` nesting.
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index 06c987e7f1761..e6b52792885ba 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -457,6 +457,8 @@ OMPUnrollDirective::Create(const ASTContext &C, SourceLocation StartLoc,
       C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc);
   Dir->setNumGeneratedLoops(NumGeneratedLoops);
   // The number of generated loops and loop nests during unroll matches
+  // given that unroll only generates top level canonical loop nests
+  // so each generated loop is a top level canonical loop nest
   Dir->setNumGeneratedLoopNests(NumGeneratedLoops);
   Dir->setTransformedStmt(TransformedStmt);
   Dir->setPreInits(PreInits);
@@ -517,6 +519,17 @@ OMPFuseDirective *OMPFuseDirective::Create(
       NumLoops);
   Dir->setTransformedStmt(TransformedStmt);
   Dir->setPreInits(PreInits);
+  // The number of top level canonical nests could 
+  // not match the total number of generated loops
+  // Example:
+  // Before fusion:
+  //   for (int i = 0; i < N; ++i)   
+  //     for (int j = 0; j < M; ++j) 
+  //       A[i][j] = i + j;
+  //   
+  //   for (int k = 0; k < P; ++k) 
+  //     B[k] = k * 2;
+  // Here, NumLoopNests = 2, but NumLoops = 3.
   Dir->setNumGeneratedLoopNests(NumLoopNests);
   Dir->setNumGeneratedLoops(NumLoops);
   return Dir;
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index 18330181f1509..53a9f80e6d3b7 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -704,7 +704,8 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) {
 
 bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) {
   return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse ||
-         DKind == OMPD_interchange || DKind == OMPD_stripe || DKind == OMPD_fuse;
+         DKind == OMPD_interchange || DKind == OMPD_stripe ||
+         DKind == OMPD_fuse;
 }
 
 bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) {
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 7cb7ee20fcf6a..1671f07bc2760 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3242,6 +3242,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
 
     // No other cases for now.
     } else {
+      llvm::dbgs() << "THE DAMN DECLREFEXPR HASN'T BEEN ENTERED IN LOCALDECLMAP\n";
+      VD->dumpColor();
       llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?");
     }
 
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index a983901f560de..ce00198c396b6 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -5414,6 +5414,10 @@ class CodeGenFunction : public CodeGenTypeCache {
 
   /// Set the address of a local variable.
   void setAddrOfLocalVar(const VarDecl *VD, Address Addr) {
+    if (LocalDeclMap.count(VD)) {
+      llvm::errs() << "Warning: VarDecl already exists in map: ";
+      VD->dumpColor(); 
+    }
     assert(!LocalDeclMap.count(VD) && "Decl already exists in LocalDeclMap!");
     LocalDeclMap.insert({VD, Addr});
   }
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 556b5cb43b6f8..b0529c9352c83 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -22,6 +22,7 @@
 #include "clang/AST/DeclOpenMP.h"
 #include "clang/AST/DynamicRecursiveASTVisitor.h"
 #include "clang/AST/OpenMPClause.h"
+#include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/StmtCXX.h"
 #include "clang/AST/StmtOpenMP.h"
 #include "clang/AST/StmtVisitor.h"
@@ -47,6 +48,7 @@
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/IR/Assumptions.h"
 #include <optional>
+#include <queue>
 
 using namespace clang;
 using namespace llvm::omp;
@@ -14157,6 +14159,45 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective(
       getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
 }
 
+// Overloaded base case function
+template <typename T, typename F>
+static bool tryHandleAs(T *t, F &&) {
+    return false; 
+}
+
+/**
+ * Tries to recursively cast `t` to one of the given types and invokes `f` if successful.
+ *
+ * @tparam Class The first type to check.
+ * @tparam Rest The remaining types to check.
+ * @tparam T The base type of `t`.
+ * @tparam F The callable type for the function to invoke upon a successful cast.
+ * @param t The object to be checked.
+ * @param f The function to invoke if `t` matches `Class`.
+ * @return `true` if `t` matched any type and `f` was called, otherwise `false`.
+ */
+template <typename Class, typename... Rest, typename T, typename F>
+static bool tryHandleAs(T *t, F &&f) {
+    if (Class *c = dyn_cast<Class>(t)) {
+        f(c); 
+        return true;
+    } else {
+        return tryHandleAs<Rest...>(t, std::forward<F>(f));
+    }
+}
+
+// Updates OriginalInits by checking Transform against loop transformation
+// directives and appending their pre-inits if a match is found.
+static void updatePreInits(OMPLoopBasedDirective *Transform,
+                           SmallVectorImpl<SmallVector<Stmt *, 0>> &PreInits) {
+    if (!tryHandleAs<OMPTileDirective, OMPUnrollDirective, OMPReverseDirective,
+                     OMPInterchangeDirective, OMPFuseDirective>(
+            Transform, [&PreInits](auto *Dir) {
+              appendFlattenedStmtList(PreInits.back(), Dir->getPreInits());
+            }))
+        llvm_unreachable("Unhandled loop transformation");
+}
+
 bool SemaOpenMP::checkTransformableLoopNest(
     OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops,
     SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
@@ -14187,121 +14228,106 @@ bool SemaOpenMP::checkTransformableLoopNest(
         return false;
       },
       [&OriginalInits](OMPLoopBasedDirective *Transform) {
-        Stmt *DependentPreInits;
-        if (auto *Dir = dyn_cast<OMPTileDirective>(Transform))
-          DependentPreInits = Dir->getPreInits();
-        else if (auto *Dir = dyn_cast<OMPStripeDirective>(Transform))
-          DependentPreInits = Dir->getPreInits();
-        else if (auto *Dir = dyn_cast<OMPUnrollDirective>(Transform))
-          DependentPreInits = Dir->getPreInits();
-        else if (auto *Dir = dyn_cast<OMPReverseDirective>(Transform))
-          DependentPreInits = Dir->getPreInits();
-        else if (auto *Dir = dyn_cast<OMPInterchangeDirective>(Transform))
-          DependentPreInits = Dir->getPreInits();
-        else if (auto *Dir = dyn_cast<OMPFuseDirective>(Transform))
-          DependentPreInits = Dir->getPreInits();
-        else
-          llvm_unreachable("Unhandled loop transformation");
-
-        appendFlattenedStmtList(OriginalInits.back(), DependentPreInits);
+        updatePreInits(Transform, OriginalInits);
       });
   assert(OriginalInits.back().empty() && "No preinit after innermost loop");
   OriginalInits.pop_back();
   return Result;
 }
 
-class NestedLoopCounterVisitor
-    : public clang::RecursiveASTVisitor<NestedLoopCounterVisitor> {
+// Counts the total number of nested loops, including the outermost loop (the
+// original loop). PRECONDITION of this visitor is that it must be invoked from
+// the original loop to be analyzed. The traversal is stop for Decl's and
+// Expr's given that they may contain inner loops that must not be counted.
+//
+// Example AST structure for the code:
+//
+// int main() {
+//     #pragma omp fuse
+//     {
+//         for (int i = 0; i < 100; i++) {    <-- Outer loop
+//             []() {
+//                 for(int j = 0; j < 100; j++) {}  <-- NOT A LOOP
+//             };
+//             for(int j = 0; j < 5; ++j) {}    <-- Inner loop
+//         }
+//         for (int r = 0; i < 100; i++) {    <-- Outer loop
+//             struct LocalClass {
+//                 void bar() {
+//                     for(int j = 0; j < 100; j++) {}  <-- NOT A LOOP
+//                 }
+//             };
+//             for(int k = 0; k < 10; ++k) {}    <-- Inner loop
+//             {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP
+//         }
+//     }
+// }
+// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops
+class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor {
+private:
+  unsigned NestedLoopCount = 0;
+
 public:
-  explicit NestedLoopCounterVisitor() : NestedLoopCount(0) {}
+  explicit NestedLoopCounterVisitor() {}
 
-  bool VisitForStmt(clang::ForStmt *FS) {
-    ++NestedLoopCount;
-    return true;
+  unsigned getNestedLoopCount() const { return NestedLoopCount; }
+
+  bool VisitForStmt(ForStmt *FS) override {
+        ++NestedLoopCount;
+        return true;
   }
 
-  bool VisitCXXForRangeStmt(clang::CXXForRangeStmt *FRS) {
-    ++NestedLoopCount;
-    return true;
+  bool VisitCXXForRangeStmt(CXXForRangeStmt *FRS) override {
+        ++NestedLoopCount;
+        return true;
   }
 
-  unsigned getNestedLoopCount() const { return NestedLoopCount; }
+  bool TraverseStmt(Stmt *S) override {
+        if (!S)
+      return true;
 
-private:
-  unsigned NestedLoopCount;
+        // Skip traversal of all expressions, including special cases like
+        // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions
+        // may contain inner statements (and even loops), but they are not part
+        // of the syntactic body of the surrounding loop structure.
+        //  Therefore must not be counted
+        if (isa<Expr>(S))
+      return true;
+
+        // Only recurse into CompoundStmt (block {}) and loop bodies
+        if (isa<CompoundStmt>(S) || isa<ForStmt>(S) ||
+            isa<CXXForRangeStmt>(S)) {
+      return DynamicRecursiveASTVisitor::TraverseStmt(S);
+        }
+
+        // Stop traversal of the rest of statements, that break perfect
+        // loop nesting, such as control flow (IfStmt, SwitchStmt...)
+        return true;
+  }
+
+  bool TraverseDecl(Decl *D) override {
+        // Stop in the case of finding a declaration, it is not important
+        // in order to find nested loops (Possible CXXRecordDecl, RecordDecl,
+        // FunctionDecl...)
+        return true;
+  }
 };
 
-bool SemaOpenMP::checkTransformableLoopSequence(
-    OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize,
-    unsigned &NumLoops,
+bool SemaOpenMP::analyzeLoopSequence(
+    Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops,
     SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
     SmallVectorImpl<Stmt *> &ForStmts,
     SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
-    ASTContext &Context) {
+    SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
+    SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+    SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context,
+    OpenMPDirectiveKind Kind) {
 
-  // Checks whether the given statement is a compound statement
   VarsWithInheritedDSAType TmpDSA;
-  if (!isa<CompoundStmt>(AStmt)) {
-    Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence)
-        << getOpenMPDirectiveName(Kind);
-    return false;
-  }
-  // Callback for updating pre-inits in case there are even more
-  // loop-sequence-generating-constructs inside of the main compound stmt
-  auto OnTransformationCallback =
-      [&OriginalInits](OMPLoopBasedDirective *Transform) {
-        Stmt *DependentPreInits;
-        if (auto *Dir = dyn_cast<OMPTileDirective>(Transform))
-          DependentPreInits = Dir->getPreInits();
-        else if (auto *Dir = dyn_cast<OMPUnrollDirective>(Transform))
-          DependentPreInits = Dir->getPreInits();
-        else if (auto *Dir = dyn_cast<OMPReverseDirective>(Transform))
-          DependentPreInits = Dir->getPreInits();
-        else if (auto *Dir = dyn_cast<OMPInterchangeDirective>(Transform))
-          DependentPreInits = Dir->getPreInits();
-        else if (auto *Dir = dyn_cast<OMPFuseDirective>(Transform))
-          DependentPreInits = Dir->getPreInits();
-        else
-          llvm_unreachable("Unhandled loop transformation");
-
-        appendFlattenedStmtList(OriginalInits.back(), DependentPreInits);
-      };
-
-  // Number of top level canonical loop nests observed (And acts as index)
-  LoopSeqSize = 0;
-  // Number of total observed loops
-  NumLoops = 0;
-
-  // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows
-  // the grammar:
-  //
-  // canonical-loop-sequence:
-  //  {
-  //    loop-sequence+
-  //  }
-  // where loop-sequence can be any of the following:
-  // 1. canonical-loop-sequence
-  // 2. loop-nest
-  // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective)
-  //
-  // To recognise and traverse this structure the following helper functions
-  // have been defined. handleLoopSequence serves as the recurisve entry point
-  // and tries to match the input AST to the canonical loop sequence grammar
-  // structure
-
-  // Helper functions to validate canonical loop sequence grammar is valid
-  auto isLoopSequenceDerivation = [](auto *Child) {
-    return isa<ForStmt>(Child) || isa<CXXForRangeStmt>(Child) ||
-           isa<OMPLoopTransformationDirective>(Child);
-  };
-  auto isLoopGeneratingStmt = [](auto *Child) {
-    return isa<OMPLoopTransformationDirective>(Child);
-  };
-
+  QualType BaseInductionVarType;
   // Helper Lambda to handle storing initialization and body statements for both
   // ForStmt and CXXForRangeStmt and checks for any possible mismatch between
   // induction variables types
-  QualType BaseInductionVarType;
   auto storeLoopStatements = [&OriginalInits, &ForStmts, &BaseInductionVarType,
                               this, &Context](Stmt *LoopStmt) {
     if (auto *For = dyn_cast<ForStmt>(LoopStmt)) {
@@ -14324,33 +14350,35 @@ bool SemaOpenMP::checkTransformableLoopSequence(
           }
         }
       }
-
     } else {
-      assert(isa<CXXForRangeStmt>(LoopStmt) &&
-             "Expected canonical for or range-based for loops.");
-      auto *CXXFor = dyn_cast<CXXForRangeStmt>(LoopStmt);
+      auto *CXXFor = cast<CXXForRangeStmt>(LoopStmt);
       OriginalInits.back().push_back(CXXFor->getBeginStmt());
       ForStmts.push_back(CXXFor);
     }
   };
+
   // Helper lambda functions to encapsulate the processing of different
   // derivations of the canonical loop sequence grammar
   //
   // Modularized code for handling loop generation and transformations
-  auto handleLoopGeneration = [&storeLoopStatements, &LoopHelpers,
-                               &OriginalInits, &LoopSeqSize, &NumLoops, Kind,
-                               &TmpDSA, &OnTransformationCallback,
-                               this](Stmt *Child) {
+  auto analyzeLoopGeneration = [&storeLoopStatements, &LoopHelpers,
+                                &OriginalInits, &TransformsPreInits,
+                                &LoopCategories, &LoopSeqSize, &NumLoops, Kind,
+                                &TmpDSA, &ForStmts, &Context,
+                                &LoopSequencePreInits, this](Stmt *Child) {
     auto LoopTransform = dyn_cast<OMPLoopTransformationDirective>(Child);
     Stmt *TransformedStmt = LoopTransform->getTransformedStmt();
     unsigned NumGeneratedLoopNests = LoopTransform->getNumGeneratedLoopNests();
-
+    unsigned NumGeneratedLoops = LoopTransform->getNumGeneratedLoops();
     // Handle the case where transformed statement is not available due to
     // dependent contexts
     if (!TransformedStmt) {
-      if (NumGeneratedLoopNests > 0)
+      if (NumGeneratedLoopNests > 0) {
+        LoopSeqSize += NumGeneratedLoopNests;
+        NumLoops += NumGeneratedLoops;
         return true;
-      // Unroll full
+      }
+      // Unroll full (0 loops produced)
       else {
         Diag(Child->getBeginLoc(), diag::err_omp_not_for)
             << 0 << getOpenMPDirectiveName(Kind);
@@ -14363,38 +14391,56 @@ bool SemaOpenMP::checkTransformableLoopSequence(
       Diag(Child->getBeginLoc(), diag::err_omp_not_for)
           << 0 << getOpenMPDirectiveName(Kind);
       return false;
-      // Future loop transformations that generate multiple canonical loops
-    } else if (NumGeneratedLoopNests > 1) {
-      llvm_unreachable("Multiple canonical loop generating transformations "
-                       "like loop splitting are not yet supported");
     }
+    // Loop transformatons such as split or loopranged fuse
+    else if (NumGeneratedLoopNests > 1) {
+      // Get the preinits related to this loop sequence generating
+      // loop transformation (i.e loopranged fuse, split...)
+      LoopSequencePreInits.emplace_back();
+      // These preinits differ slightly from regular inits/pre-inits related
+      // to single loop generating loop transformations (interchange, unroll)
+      // given that they are not bounded to a particular loop nest
+      // so they need to be treated independently
+      updatePreInits(LoopTransform, LoopSequencePreInits);
+      return analyzeLoopSequence(TransformedStmt, LoopSeqSize, NumLoops,
+                                 LoopHelpers, ForStmts, OriginalInits,
+                                 TransformsPreInits, LoopSequencePreInits,
+                                 LoopCategories, Context, Kind);
+    }
+    // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all)
+    else {
+      // Process the transformed loop statement
+      OriginalInits.emplace_back();
+      TransformsPreInits.emplace_back();
+      LoopHelpers.emplace_back();
+      LoopCategories.push_back(OMPLoopCategory::TransformSingleLoop);
+
+      unsigned IsCanonical =
+          checkOpenMPLoop(Kind, nullptr, nullptr, TransformedStmt, SemaRef,
+                          *DSAStack, TmpDSA, LoopHelpers[LoopSeqSize]);
+
+      if (!IsCanonical) {
+        Diag(TransformedStmt->getBeginLoc(), diag::err_omp_not_canonical_loop)
+            << getOpenMPDirectiveName(Kind);
+        return false;
+      }
+      storeLoopStatements(TransformedStmt);
+      updatePreInits(LoopTransform, TransformsPreInits);
 
-    // Process the transformed loop statement
-    Child = TransformedStmt;
-    OriginalInits.emplace_back();
-    LoopHelpers.emplace_back();
-    OnTransformationCallback(LoopTransform);
-
-    unsigned IsCanonical =
-        checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack,
-                        TmpDSA, LoopHelpers[LoopSeqSize]);
-
-    if (!IsCanonical) {
-      Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop)
-          << getOpenMPDirectiveName(Kind);
-      return false;
+      NumLoops += NumGeneratedLoops;
+      ++LoopSeqSize;
+      return true;
     }
-    storeLoopStatements(TransformedStmt);
-    NumLoops += LoopTransform->getNumGeneratedLoops();
-    return true;
   };
 
   // Modularized code for handling regular canonical loops
-  auto handleRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits,
-                            &LoopSeqSize, &NumLoops, Kind, &TmpDSA,
-                            this](Stmt *Child) {
+  auto analyzeRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits,
+                             &LoopSeqSize, &NumLoops, Kind, &TmpDSA,
+                             &LoopCategories, this](Stmt *Child) {
     OriginalInits.emplace_back();
     LoopHelpers.emplace_back();
+    LoopCategories.push_back(OMPLoopCategory::RegularLoop);
+
     unsigned IsCanonical =
         checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack,
                         TmpDSA, LoopHelpers[LoopSeqSize]);
@@ -14412,57 +14458,114 @@ bool SemaOpenMP::checkTransformableLoopSequence(
     return true;
   };
 
-  // Helper function to process a Loop Sequence Recursively
-  auto handleLoopSequence = [&](Stmt *LoopSeqStmt,
-                                auto &handleLoopSequenceCallback) -> bool {
-    for (auto *Child : LoopSeqStmt->children()) {
-      if (!Child)
-        continue;
+  // Helper functions to validate canonical loop sequence grammar is valid
+  auto isLoopSequenceDerivation = [](auto *Child) {
+    return isa<ForStmt>(Child) || isa<CXXForRangeStmt>(Child) ||
+           isa<OMPLoopTransformationDirective>(Child);
+  };
+  auto isLoopGeneratingStmt = [](auto *Child) {
+    return isa<OMPLoopTransformationDirective>(Child);
+  };
+
 
-      // Skip over non-loop-sequence statements
-      if (!isLoopSequenceDerivation(Child)) {
-        Child = Child->IgnoreContainers();
+  // High level grammar validation
+  for (auto *Child : LoopSeqStmt->children()) {
 
-        // Ignore empty compound statement
         if (!Child)
-          continue;
+      continue;
 
-        // In the case of a nested loop sequence ignoring containers would not
-        // be enough, a recurisve transversal of the loop sequence is required
-        if (isa<CompoundStmt>(Child)) {
-          if (!handleLoopSequenceCallback(Child, handleLoopSequenceCallback))
-            return false;
-          // Already been treated, skip this children
-          continue;
+        // Skip over non-loop-sequence statements
+        if (!isLoopSequenceDerivation(Child)) {
+      Child = Child->IgnoreContainers();
+
+      // Ignore empty compound statement
+      if (!Child)
+        continue;
+
+      // In the case of a nested loop sequence ignoring containers would not
+      // be enough, a recurisve transversal of the loop sequence is required
+      if (isa<CompoundStmt>(Child)) {
+        if (!analyzeLoopSequence(Child, LoopSeqSize, NumLoops, LoopHelpers,
+                                 ForStmts, OriginalInits, TransformsPreInits,
+                                 LoopSequencePreInits, LoopCategories, Context,
+                                 Kind))
+          return false;
+        // Already been treated, skip this children
+        continue;
+      }
+        }
+        // Regular loop sequence handling
+        if (isLoopSequenceDerivation(Child)) {
+      if (isLoopGeneratingStmt(Child)) {
+        if (!analyzeLoopGeneration(Child)) {
+          return false;
         }
+        // analyzeLoopGeneration updates Loop Sequence size accordingly
+
+      } else {
+        if (!analyzeRegularLoop(Child)) {
+          return false;
+        }
+        // Update the Loop Sequence size by one
+        ++LoopSeqSize;
       }
-      // Regular loop sequence handling
-      if (isLoopSequenceDerivation(Child)) {
-        if (isLoopGeneratingStmt(Child)) {
-          if (!handleLoopGeneration(Child)) {
-            return false;
-          }
         } else {
-          if (!handleRegularLoop(Child)) {
-            return false;
-          }
+      // Report error for invalid statement inside canonical loop sequence
+      Diag(Child->getBeginLoc(), diag::err_omp_not_for)
+          << 0 << getOpenMPDirectiveName(Kind);
+      return false;
         }
-        ++LoopSeqSize;
-      } else {
-        // Report error for invalid statement inside canonical loop sequence
-        Diag(Child->getBeginLoc(), diag::err_omp_not_for)
-            << 0 << getOpenMPDirectiveName(Kind);
+  }
+  return true;
+}
+
+bool SemaOpenMP::checkTransformableLoopSequence(
+    OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize,
+    unsigned &NumLoops,
+    SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
+    SmallVectorImpl<Stmt *> &ForStmts,
+    SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
+    SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
+    SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+    SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context) {
+
+  // Checks whether the given statement is a compound statement
+  if (!isa<CompoundStmt>(AStmt)) {
+        Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence)
+            << getOpenMPDirectiveName(Kind);
         return false;
-      }
-    }
-    return true;
-  };
+  }
+  // Number of top level canonical loop nests observed (And acts as index)
+  LoopSeqSize = 0;
+  // Number of total observed loops
+  NumLoops = 0;
+
+  // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows
+  // the grammar:
+  //
+  // canonical-loop-sequence:
+  //  {
+  //    loop-sequence+
+  //  }
+  // where loop-sequence can be any of the following:
+  // 1. canonical-loop-sequence
+  // 2. loop-nest
+  // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective)
+  //
+  // To recognise and traverse this structure the following helper functions
+  // have been defined. analyzeLoopSequence serves as the recurisve entry point
+  // and tries to match the input AST to the canonical loop sequence grammar
+  // structure. This function will perform both a semantic and syntactical
+  // analysis of the given statement according to OpenMP 6.0 definition of
+  // the aforementioned canonical loop sequence
 
   // Recursive entry point to process the main loop sequence
-  if (!handleLoopSequence(AStmt, handleLoopSequence)) {
-    return false;
+  if (!analyzeLoopSequence(AStmt, LoopSeqSize, NumLoops, LoopHelpers, ForStmts,
+                           OriginalInits, TransformsPreInits,
+                           LoopSequencePreInits, LoopCategories, Context,
+                           Kind)) {
+        return false;
   }
-
   if (LoopSeqSize <= 0) {
     Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence)
         << getOpenMPDirectiveName(Kind);
@@ -14494,9 +14597,7 @@ static void addLoopPreInits(ASTContext &Context,
                                               RangeEnd->getBeginLoc(),
                                               RangeEnd->getEndLoc()));
   }
-
   llvm::append_range(PreInits, OriginalInit);
-
   // List of OMPCapturedExprDecl, for __begin, __end, and NumIterations
   if (auto *PI = cast_or_null<DeclStmt>(LoopHelper.PreInits)) {
     PreInits.push_back(new (Context) DeclStmt(
@@ -15177,7 +15278,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
   Stmt *LoopStmt = nullptr;
   collectLoopStmts(AStmt, {LoopStmt});
 
-  // Determine the PreInit declarations.
+  // Determine the PreInit declarations.e
   SmallVector<Stmt *, 4> PreInits;
   addLoopPreInits(Context, LoopHelper, LoopStmt, OriginalInits[0], PreInits);
 
@@ -15744,28 +15845,35 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
   if (!AStmt) {
     return StmtError();
   }
+
+  unsigned NumLoops = 1;
+  unsigned LoopSeqSize = 1;
+
+  // Defer transformation in dependent contexts
+  // The NumLoopNests argument is set to a placeholder 1 (even though
+  // using looprange fuse could yield up to 3 top level loop nests)
+  // because a dependent context could prevent determining its true value
+  if (CurrContext->isDependentContext()) {
+    return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses,
+                                    NumLoops, LoopSeqSize, AStmt, nullptr,
+                                    nullptr);
+  }
+
   // Validate that the potential loop sequence is transformable for fusion
   // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops
   SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers;
   SmallVector<Stmt *> LoopStmts;
   SmallVector<SmallVector<Stmt *, 0>> OriginalInits;
-
-  unsigned NumLoops;
-  unsigned LoopSeqSize;
+  SmallVector<SmallVector<Stmt *, 0>> TransformsPreInits;
+  SmallVector<SmallVector<Stmt *, 0>> LoopSequencePreInits;
+  SmallVector<OMPLoopCategory, 0> LoopCategories;
   if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops,
                                       LoopHelpers, LoopStmts, OriginalInits,
-                                      Context)) {
+                                      TransformsPreInits, LoopSequencePreInits,
+                                      LoopCategories, Context)) {
     return StmtError();
   }
 
-  // Defer transformation in dependent contexts
-  // The NumLoopNests argument is set to a placeholder (0)
-  // because a dependent context could prevent determining its true value
-  if (CurrContext->isDependentContext()) {
-    return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses,
-                                    NumLoops, 0, AStmt, nullptr, nullptr);
-  }
-
   // Handle clauses, which can be any of the following: [looprange, apply]
   const OMPLoopRangeClause *LRC =
       OMPExecutableDirective::getSingleClause<OMPLoopRangeClause>(Clauses);
@@ -15827,11 +15935,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
          "Expecting loop iteration space dimensionality to match number of "
          "affected loops");
 
-  // PreInits hold a sequence of variable declarations that must be executed
-  // before the fused loop begins. These include bounds, strides, and other
-  // helper variables required for the transformation.
-  SmallVector<Stmt *> PreInits;
-
   // Select the type with the largest bit width among all induction variables
   QualType IVType = LoopHelpers[FirstVal - 1].IterationVarRef->getType();
   for (unsigned int I = FirstVal; I < LastVal; ++I) {
@@ -15843,7 +15946,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
   uint64_t IVBitWidth = Context.getIntWidth(IVType);
 
   // Create pre-init declarations for all loops lower bounds, upper bounds,
-  // strides and num-iterations
+  // strides and num-iterations for every top level loop in the fusion
   SmallVector<VarDecl *, 4> LBVarDecls;
   SmallVector<VarDecl *, 4> STVarDecls;
   SmallVector<VarDecl *, 4> NIVarDecls;
@@ -15881,12 +15984,62 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
         return std::make_pair(VD, DeclStmt);
       };
 
+  // PreInits hold a sequence of variable declarations that must be executed
+  // before the fused loop begins. These include bounds, strides, and other
+  // helper variables required for the transformation. Other loop transforms
+  // also contain their own preinits
+  SmallVector<Stmt *> PreInits;
+  // Iterator to keep track of loop transformations
+  unsigned int TransformIndex = 0;
+
+  //  Update the general preinits using the preinits generated by loop sequence
+  //  generating loop transformations. These preinits differ slightly from
+  //  single-loop transformation preinits, as they can be detached from a
+  //  specific loop inside the multiple generated loop nests. This happens
+  //  because certain helper variables, like '.omp.fuse.max', are introduced to
+  //  handle fused iteration spaces and may not be directly tied to a single
+  //  original loop. the preinit structure must ensure that hidden variables
+  //  like '.omp.fuse.max' are still properly handled.
+  // Transformations that apply this concept: Loopranged Fuse, Split
+  if (!LoopSequencePreInits.empty()) {
+    for (const auto &LTPreInits : LoopSequencePreInits) {
+      if (!LTPreInits.empty()) {
+        llvm::append_range(PreInits, LTPreInits);
+      }
+    }
+  }
+
   // Process each single loop to generate and collect declarations
-  // and statements for all helper expressions
+  // and statements for all helper expressions related to
+  // particular single loop nests
+
+  // Also In the case of the fused loops, we keep track of their original
+  // inits by appending them to their preinits statement, and in the case of
+  // transformations, also append their preinits (which contain the original
+  // loop initialization statement or other statements)
+
+  // Firstly we need to update TransformIndex to match the begining of the
+  // looprange section
+  for (unsigned int I = 0; I < FirstVal - 1; ++I) {
+    if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop)
+      ++TransformIndex;
+  }
   for (unsigned int I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) {
-    addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I],
-                    PreInits);
 
+    if (LoopCategories[I] == OMPLoopCategory::RegularLoop) {
+      addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I],
+                      PreInits);
+    } else if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) {
+      // For transformed loops, insert both pre-inits and original inits.
+      // Order matters: pre-inits may define variables used in the original
+      // inits such as upper bounds...
+      auto TransformPreInit = TransformsPreInits[TransformIndex++];
+      if (!TransformPreInit.empty()) {
+        llvm::append_range(PreInits, TransformPreInit);
+      }
+      addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I],
+                      PreInits);
+    }
     auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", J);
     auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", J);
     auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", J);
@@ -15905,7 +16058,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
     NIVarDecls.push_back(NIVD);
     IVVarDecls.push_back(IVVD);
 
-    PreInits.push_back(UBDStmt.get());
     PreInits.push_back(LBDStmt.get());
     PreInits.push_back(STDStmt.get());
     PreInits.push_back(NIDStmt.get());
@@ -16081,6 +16233,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
     BodyStmts.push_back(IdxExpr.get());
     llvm::append_range(BodyStmts, LoopHelpers[I].Updates);
 
+    // If the loop is a CXXForRangeStmt then the iterator variable is needed
     if (auto *SourceCXXFor = dyn_cast<CXXForRangeStmt>(LoopStmts[I]))
       BodyStmts.push_back(SourceCXXFor->getLoopVarStmt());
 
@@ -16115,21 +16268,50 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
               FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(),
               IncrExpr.get()->getEndLoc());
 
-  // In the case of looprange, the result of fuse won't simply
-  // be a single loop (ForStmt), but rather a loop sequence
-  // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop
-  // and the post-fusion loops, preserving its original order.
+  //  In the case of looprange, the result of fuse won't simply
+  //  be a single loop (ForStmt), but rather a loop sequence
+  //  (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop
+  //  and the post-fusion loops, preserving its original order.
+  //
+  //  Note: If looprange clause produces a single fused loop nest then
+  //  this compound statement wrapper is unnecessary (Therefore this
+  //  treatment is skipped)
+
   Stmt *FusionStmt = FusedForStmt;
-  if (LRC) {
+  if (LRC && CountVal != LoopSeqSize) {
     SmallVector<Stmt *, 4> FinalLoops;
-    // Gather all the pre-fusion loops
-    for (unsigned I = 0; I < FirstVal - 1; ++I)
-      FinalLoops.push_back(LoopStmts[I]);
-    // Gather the fused loop
-    FinalLoops.push_back(FusedForStmt);
-    // Gather all the post-fusion loops
-    for (unsigned I = FirstVal + CountVal - 1; I < LoopSeqSize; ++I)
+    // Reset the transform index
+    TransformIndex = 0;
+
+    // Collect all non-fused loops before and after the fused region.
+    // Pre-fusion and post-fusion loops are inserted in order exploiting their
+    // symmetry, along with their corresponding transformation pre-inits if
+    // needed. The fused loop is added between the two regions.
+    for (unsigned I = 0; I < LoopSeqSize; ++I) {
+      if (I >= FirstVal - 1 && I < FirstVal + CountVal - 1) {
+        // Update the Transformation counter to skip already treated
+        // loop transformations
+        if (LoopCategories[I] != OMPLoopCategory::TransformSingleLoop)
+          ++TransformIndex;
+        continue;
+      }
+
+      // No need to handle:
+      // Regular loops: they are kept intact as-is.
+      // Loop-sequence-generating transformations: already handled earlier.
+      // Only TransformSingleLoop requires inserting pre-inits here
+
+      if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) {
+        auto TransformPreInit = TransformsPreInits[TransformIndex++];
+        if (!TransformPreInit.empty()) {
+          llvm::append_range(PreInits, TransformPreInit);
+        }
+      }
+
       FinalLoops.push_back(LoopStmts[I]);
+    }
+
+    FinalLoops.insert(FinalLoops.begin() + (FirstVal - 1), FusedForStmt);
     FusionStmt = CompoundStmt::Create(Context, FinalLoops, FPOptionsOverride(),
                                       SourceLocation(), SourceLocation());
   }
diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp
index ac4f0d38a9c68..9d85bd1172948 100644
--- a/clang/test/OpenMP/fuse_ast_print.cpp
+++ b/clang/test/OpenMP/fuse_ast_print.cpp
@@ -338,6 +338,61 @@ void tfoo9() {
     foo9<1, 2>();
 }
 
+// PRINT-LABEL: void foo10(
+// DUMP-LABEL: FunctionDecl {{.*}} foo10
+void foo10() {
+    // PRINT: #pragma omp fuse looprange(2,2)
+    // DUMP:  OMPFuseDirective
+    // DUMP: OMPLooprangeClause
+    #pragma omp fuse looprange(2,2)
+    // PRINT: {
+    // DUMP: CompoundStmt       
+    {
+        // PRINT: for (int i = 0; i < 10; i += 2)
+        // DUMP: ForStmt
+        for (int i = 0; i < 10; i += 2)
+            // PRINT: body(i)
+            // DUMP: CallExpr
+            body(i);
+        // PRINT: for (int ii = 0; ii < 10; ii += 2)
+        // DUMP: ForStmt
+        for (int ii = 0; ii < 10; ii += 2)
+            // PRINT: body(ii)
+            // DUMP: CallExpr
+            body(ii);
+        // PRINT: #pragma omp fuse looprange(2,2)
+        // DUMP:  OMPFuseDirective
+        // DUMP: OMPLooprangeClause
+        #pragma omp fuse looprange(2,2)
+        {
+            // PRINT: for (int j = 10; j > 0; --j)
+            // DUMP: ForStmt
+            for (int j = 10; j > 0; --j)
+                // PRINT: body(j)
+                // DUMP: CallExpr
+                body(j);
+            // PRINT: for (int jj = 10; jj > 0; --jj)
+            // DUMP: ForStmt
+            for (int jj = 10; jj > 0; --jj)
+                // PRINT: body(jj)
+                // DUMP: CallExpr
+                body(jj);
+            // PRINT: for (int k = 0; k <= 10; ++k)
+            // DUMP: ForStmt
+            for (int k = 0; k <= 10; ++k)
+                // PRINT: body(k)
+                // DUMP: CallExpr
+                body(k);
+            // PRINT: for (int kk = 0; kk <= 10; ++kk)
+            // DUMP: ForStmt
+            for (int kk = 0; kk <= 10; ++kk)
+                // PRINT: body(kk)
+                // DUMP: CallExpr
+                body(kk);
+        }
+    }
+
+}
 
 
 
diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp
index d9500bed3ce31..742c280ed0172 100644
--- a/clang/test/OpenMP/fuse_codegen.cpp
+++ b/clang/test/OpenMP/fuse_codegen.cpp
@@ -65,6 +65,23 @@ extern "C" void foo4() {
     }
 }
 
+// This exemplifies the usage of loop transformations that generate
+// more than top level canonical loop nests (e.g split, loopranged fuse...)
+extern "C" void foo5() {
+    double arr[256];
+    #pragma omp fuse looprange(2,2)
+    {
+        #pragma omp fuse looprange(2,2)
+        {
+            for(int i = 0; i < 128; ++i) body(i);
+            for(int j = 0; j < 256; j+=2) body(j);
+            for(int k = 0; k < 512; ++k) body(k);
+        }
+        for(int c = 42; auto &&v: arr) body(c,v);
+        for(int cc = 37; auto &&vv: arr) body(cc, vv);
+    }
+}
+
 
 #endif
 // CHECK1-LABEL: define dso_local void @body(
@@ -88,7 +105,6 @@ extern "C" void foo4() {
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTNEW_STEP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[DOTOMP_UB0:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
@@ -97,7 +113,6 @@ extern "C" void foo4() {
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[DOTOMP_UB1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -129,107 +144,103 @@ extern "C" void foo4() {
 // CHECK1-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
 // CHECK1-NEXT:    [[SUB4:%.*]] = sub i32 [[DIV]], 1
 // CHECK1-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT:    store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
-// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT:    [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT:    [[ADD5:%.*]] = add i32 [[TMP8]], 1
 // CHECK1-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP9]], ptr [[J]], align 4
 // CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4
-// CHECK1-NEXT:    store i32 [[TMP10]], ptr [[J]], align 4
-// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4
-// CHECK1-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4
-// CHECK1-NEXT:    store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
-// CHECK1-NEXT:    store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK1-NEXT:    [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK1-NEXT:    store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT:    [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
 // CHECK1-NEXT:    [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT:    [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
-// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT:    [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK1-NEXT:    [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
 // CHECK1-NEXT:    [[SUB14:%.*]] = sub i32 [[DIV13]], 1
 // CHECK1-NEXT:    store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK1-NEXT:    store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
-// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK1-NEXT:    [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT:    [[ADD15:%.*]] = add i32 [[TMP17]], 1
 // CHECK1-NEXT:    store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
-// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK1-NEXT:    store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK1-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]]
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]]
 // CHECK1-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
 // CHECK1:       [[COND_TRUE]]:
-// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
 // CHECK1-NEXT:    br label %[[COND_END:.*]]
 // CHECK1:       [[COND_FALSE]]:
-// CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
 // CHECK1-NEXT:    br label %[[COND_END]]
 // CHECK1:       [[COND_END]]:
-// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ]
+// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ]
 // CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
 // CHECK1-NEXT:    br label %[[FOR_COND:.*]]
 // CHECK1:       [[FOR_COND]]:
-// CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
-// CHECK1-NEXT:    [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]]
 // CHECK1-NEXT:    br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
 // CHECK1:       [[FOR_BODY]]:
-// CHECK1-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK1-NEXT:    [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]]
+// CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
 // CHECK1-NEXT:    br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
 // CHECK1:       [[IF_THEN]]:
-// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
-// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
-// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]]
-// CHECK1-NEXT:    [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]]
+// CHECK1-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]]
+// CHECK1-NEXT:    [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]]
 // CHECK1-NEXT:    store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4
-// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
-// CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
-// CHECK1-NEXT:    [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]]
-// CHECK1-NEXT:    [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]]
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT:    [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT:    [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]]
 // CHECK1-NEXT:    store i32 [[ADD20]], ptr [[I]], align 4
-// CHECK1-NEXT:    [[TMP35:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP35]])
+// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP33]])
 // CHECK1-NEXT:    br label %[[IF_END]]
 // CHECK1:       [[IF_END]]:
-// CHECK1-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK1-NEXT:    [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]]
+// CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]]
 // CHECK1-NEXT:    br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]]
 // CHECK1:       [[IF_THEN22]]:
-// CHECK1-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
-// CHECK1-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
-// CHECK1-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]]
-// CHECK1-NEXT:    [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]]
+// CHECK1-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]]
+// CHECK1-NEXT:    [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]]
 // CHECK1-NEXT:    store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4
-// CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK1-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
-// CHECK1-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT:    [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]]
-// CHECK1-NEXT:    [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]]
+// CHECK1-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]]
+// CHECK1-NEXT:    [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]]
 // CHECK1-NEXT:    store i32 [[ADD26]], ptr [[J]], align 4
-// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, ptr [[J]], align 4
-// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP44]])
+// CHECK1-NEXT:    [[TMP42:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP42]])
 // CHECK1-NEXT:    br label %[[IF_END27]]
 // CHECK1:       [[IF_END27]]:
 // CHECK1-NEXT:    br label %[[FOR_INC:.*]]
 // CHECK1:       [[FOR_INC]]:
-// CHECK1-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[INC:%.*]] = add i32 [[TMP45]], 1
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add i32 [[TMP43]], 1
 // CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
 // CHECK1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
 // CHECK1:       [[FOR_END]]:
@@ -256,7 +267,6 @@ extern "C" void foo4() {
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTNEW_STEP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[DOTOMP_UB0:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
@@ -265,7 +275,6 @@ extern "C" void foo4() {
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[DOTOMP_UB1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -274,7 +283,6 @@ extern "C" void foo4() {
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTNEW_STEP21:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[DOTOMP_UB2:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_LB2:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_ST2:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_NI2:%.*]] = alloca i32, align 4
@@ -304,172 +312,166 @@ extern "C" void foo4() {
 // CHECK1-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
 // CHECK1-NEXT:    [[SUB4:%.*]] = sub i32 [[DIV]], 1
 // CHECK1-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT:    store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
-// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT:    [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT:    [[ADD5:%.*]] = add i32 [[TMP8]], 1
 // CHECK1-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP9]], ptr [[J]], align 4
 // CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4
-// CHECK1-NEXT:    store i32 [[TMP10]], ptr [[J]], align 4
-// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4
-// CHECK1-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4
-// CHECK1-NEXT:    store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK1-NEXT:    store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK1-NEXT:    [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK1-NEXT:    store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT:    [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
 // CHECK1-NEXT:    [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT:    [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
-// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT:    [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK1-NEXT:    [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
 // CHECK1-NEXT:    [[SUB14:%.*]] = sub i32 [[DIV13]], 1
 // CHECK1-NEXT:    store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK1-NEXT:    store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
-// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK1-NEXT:    [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT:    [[ADD15:%.*]] = add i32 [[TMP17]], 1
 // CHECK1-NEXT:    store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK1-NEXT:    store i32 [[ADD16]], ptr [[K]], align 4
 // CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4
 // CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK1-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK1-NEXT:    store i32 [[ADD16]], ptr [[K]], align 4
-// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4
-// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK1-NEXT:    [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK1-NEXT:    [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
 // CHECK1-NEXT:    store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4
-// CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK1-NEXT:    [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
+// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT:    [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
 // CHECK1-NEXT:    store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4
-// CHECK1-NEXT:    [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK1-NEXT:    store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4
-// CHECK1-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
-// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK1-NEXT:    [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]]
+// CHECK1-NEXT:    [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK1-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT:    [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]]
 // CHECK1-NEXT:    [[SUB24:%.*]] = sub i32 [[SUB23]], 1
-// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
-// CHECK1-NEXT:    [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]]
-// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
-// CHECK1-NEXT:    [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]]
+// CHECK1-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT:    [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]]
+// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT:    [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]]
 // CHECK1-NEXT:    [[SUB27:%.*]] = sub i32 [[DIV26]], 1
 // CHECK1-NEXT:    store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK1-NEXT:    store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB2]], align 4
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST2]], align 4
-// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK1-NEXT:    [[ADD28:%.*]] = add i32 [[TMP32]], 1
+// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK1-NEXT:    [[ADD28:%.*]] = add i32 [[TMP29]], 1
 // CHECK1-NEXT:    store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4
-// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK1-NEXT:    store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK1-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK1-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]]
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]]
 // CHECK1-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
 // CHECK1:       [[COND_TRUE]]:
-// CHECK1-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
 // CHECK1-NEXT:    br label %[[COND_END:.*]]
 // CHECK1:       [[COND_FALSE]]:
-// CHECK1-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
 // CHECK1-NEXT:    br label %[[COND_END]]
 // CHECK1:       [[COND_END]]:
-// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ]
+// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ]
 // CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4
-// CHECK1-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
-// CHECK1-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
-// CHECK1-NEXT:    [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]]
+// CHECK1-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT:    [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]]
 // CHECK1-NEXT:    br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]]
 // CHECK1:       [[COND_TRUE30]]:
-// CHECK1-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
 // CHECK1-NEXT:    br label %[[COND_END32:.*]]
 // CHECK1:       [[COND_FALSE31]]:
-// CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
 // CHECK1-NEXT:    br label %[[COND_END32]]
 // CHECK1:       [[COND_END32]]:
-// CHECK1-NEXT:    [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ]
+// CHECK1-NEXT:    [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ]
 // CHECK1-NEXT:    store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
 // CHECK1-NEXT:    br label %[[FOR_COND:.*]]
 // CHECK1:       [[FOR_COND]]:
-// CHECK1-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
-// CHECK1-NEXT:    [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]]
 // CHECK1-NEXT:    br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
 // CHECK1:       [[FOR_BODY]]:
-// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK1-NEXT:    [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]]
+// CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]]
 // CHECK1-NEXT:    br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
 // CHECK1:       [[IF_THEN]]:
-// CHECK1-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
-// CHECK1-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
-// CHECK1-NEXT:    [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]]
-// CHECK1-NEXT:    [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]]
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]]
+// CHECK1-NEXT:    [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]]
 // CHECK1-NEXT:    store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4
-// CHECK1-NEXT:    [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
-// CHECK1-NEXT:    [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
-// CHECK1-NEXT:    [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]]
-// CHECK1-NEXT:    [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]]
+// CHECK1-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT:    [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]]
+// CHECK1-NEXT:    [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]]
 // CHECK1-NEXT:    store i32 [[ADD38]], ptr [[I]], align 4
-// CHECK1-NEXT:    [[TMP52:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP52]])
+// CHECK1-NEXT:    [[TMP49:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP49]])
 // CHECK1-NEXT:    br label %[[IF_END]]
 // CHECK1:       [[IF_END]]:
-// CHECK1-NEXT:    [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK1-NEXT:    [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]]
+// CHECK1-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]]
 // CHECK1-NEXT:    br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]]
 // CHECK1:       [[IF_THEN40]]:
-// CHECK1-NEXT:    [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
-// CHECK1-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
-// CHECK1-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]]
-// CHECK1-NEXT:    [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]]
+// CHECK1-NEXT:    [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]]
+// CHECK1-NEXT:    [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]]
 // CHECK1-NEXT:    store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4
-// CHECK1-NEXT:    [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK1-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
-// CHECK1-NEXT:    [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT:    [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]]
-// CHECK1-NEXT:    [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]]
+// CHECK1-NEXT:    [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT:    [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]]
+// CHECK1-NEXT:    [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]]
 // CHECK1-NEXT:    store i32 [[SUB44]], ptr [[J]], align 4
-// CHECK1-NEXT:    [[TMP61:%.*]] = load i32, ptr [[J]], align 4
-// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP61]])
+// CHECK1-NEXT:    [[TMP58:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP58]])
 // CHECK1-NEXT:    br label %[[IF_END45]]
 // CHECK1:       [[IF_END45]]:
-// CHECK1-NEXT:    [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
-// CHECK1-NEXT:    [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]]
+// CHECK1-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT:    [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]]
 // CHECK1-NEXT:    br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
 // CHECK1:       [[IF_THEN47]]:
-// CHECK1-NEXT:    [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
-// CHECK1-NEXT:    [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
-// CHECK1-NEXT:    [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]]
-// CHECK1-NEXT:    [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]]
+// CHECK1-NEXT:    [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
+// CHECK1-NEXT:    [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
+// CHECK1-NEXT:    [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]]
+// CHECK1-NEXT:    [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]]
 // CHECK1-NEXT:    store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4
-// CHECK1-NEXT:    [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK1-NEXT:    [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
-// CHECK1-NEXT:    [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
-// CHECK1-NEXT:    [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]]
-// CHECK1-NEXT:    [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]]
+// CHECK1-NEXT:    [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT:    [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
+// CHECK1-NEXT:    [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT:    [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]]
+// CHECK1-NEXT:    [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]]
 // CHECK1-NEXT:    store i32 [[ADD51]], ptr [[K]], align 4
-// CHECK1-NEXT:    [[TMP70:%.*]] = load i32, ptr [[K]], align 4
-// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP70]])
+// CHECK1-NEXT:    [[TMP67:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP67]])
 // CHECK1-NEXT:    br label %[[IF_END52]]
 // CHECK1:       [[IF_END52]]:
 // CHECK1-NEXT:    br label %[[FOR_INC:.*]]
 // CHECK1:       [[FOR_INC]]:
-// CHECK1-NEXT:    [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[INC:%.*]] = add i32 [[TMP71]], 1
+// CHECK1-NEXT:    [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add i32 [[TMP68]], 1
 // CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
 // CHECK1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
 // CHECK1:       [[FOR_END]]:
@@ -481,13 +483,11 @@ extern "C" void foo4() {
 // CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[ARR:%.*]] = alloca [256 x double], align 16
 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[DOTOMP_UB0:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[J:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[DOTOMP_UB1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -497,48 +497,43 @@ extern "C" void foo4() {
 // CHECK1-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[DOTOMP_UB03:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[DOTOMP_LB04:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[DOTOMP_ST05:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[DOTOMP_NI06:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[DOTOMP_IV07:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_IV06:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[C:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[__RANGE2:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[__END2:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[__BEGIN2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[DOTOMP_UB117:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[DOTOMP_LB118:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[DOTOMP_ST119:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[DOTOMP_NI120:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[DOTOMP_IV122:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_IV120:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[CC:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[__RANGE223:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    [[__END224:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    [[__BEGIN227:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__RANGE221:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__END222:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__BEGIN225:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[DOTOMP_UB2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOMP_LB2:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOMP_ST2:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOMP_NI2:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOMP_IV2:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[V:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[VV:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT:    store i32 127, ptr [[DOTOMP_UB0]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
 // CHECK1-NEXT:    store i32 128, ptr [[DOTOMP_NI0]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[J]], align 4
-// CHECK1-NEXT:    store i32 127, ptr [[DOTOMP_UB1]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
 // CHECK1-NEXT:    store i32 128, ptr [[DOTOMP_NI1]], align 4
@@ -565,225 +560,219 @@ extern "C" void foo4() {
 // CHECK1-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK1-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK1-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST04]], align 4
 // CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT:    store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4
-// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB04]], align 4
-// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST05]], align 4
-// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
 // CHECK1-NEXT:    [[CONV:%.*]] = sext i32 [[ADD]] to i64
-// CHECK1-NEXT:    store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8
+// CHECK1-NEXT:    store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
 // CHECK1-NEXT:    store i32 42, ptr [[C]], align 4
 // CHECK1-NEXT:    store ptr [[ARR]], ptr [[__RANGE2]], align 8
-// CHECK1-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
-// CHECK1-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK1-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
 // CHECK1-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
 // CHECK1-NEXT:    store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK1-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
-// CHECK1-NEXT:    [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
-// CHECK1-NEXT:    store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8
-// CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8
-// CHECK1-NEXT:    [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0
-// CHECK1-NEXT:    store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8
-// CHECK1-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8
-// CHECK1-NEXT:    store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8
-// CHECK1-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8
-// CHECK1-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
-// CHECK1-NEXT:    [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
-// CHECK1-NEXT:    [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64
+// CHECK1-NEXT:    [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK1-NEXT:    store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT:    [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK1-NEXT:    [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
 // CHECK1-NEXT:    [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
 // CHECK1-NEXT:    [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
-// CHECK1-NEXT:    [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
-// CHECK1-NEXT:    [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1
-// CHECK1-NEXT:    [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1
-// CHECK1-NEXT:    [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1
-// CHECK1-NEXT:    store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8
-// CHECK1-NEXT:    [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
-// CHECK1-NEXT:    store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8
-// CHECK1-NEXT:    store i64 0, ptr [[DOTOMP_LB118]], align 8
-// CHECK1-NEXT:    store i64 1, ptr [[DOTOMP_ST119]], align 8
-// CHECK1-NEXT:    [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
-// CHECK1-NEXT:    [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1
-// CHECK1-NEXT:    store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8
+// CHECK1-NEXT:    [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK1-NEXT:    [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK1-NEXT:    [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK1-NEXT:    [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK1-NEXT:    store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT:    store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT:    store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT:    [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK1-NEXT:    store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
 // CHECK1-NEXT:    store i32 37, ptr [[CC]], align 4
-// CHECK1-NEXT:    store ptr [[ARR]], ptr [[__RANGE223]], align 8
-// CHECK1-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8
-// CHECK1-NEXT:    [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
-// CHECK1-NEXT:    [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256
-// CHECK1-NEXT:    store ptr [[ADD_PTR26]], ptr [[__END224]], align 8
-// CHECK1-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8
-// CHECK1-NEXT:    [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0
-// CHECK1-NEXT:    store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8
-// CHECK1-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8
-// CHECK1-NEXT:    [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0
-// CHECK1-NEXT:    store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8
-// CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8
-// CHECK1-NEXT:    store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8
-// CHECK1-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8
-// CHECK1-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
-// CHECK1-NEXT:    [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64
-// CHECK1-NEXT:    [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64
-// CHECK1-NEXT:    [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]]
-// CHECK1-NEXT:    [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8
-// CHECK1-NEXT:    [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1
-// CHECK1-NEXT:    [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1
-// CHECK1-NEXT:    [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1
-// CHECK1-NEXT:    [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1
-// CHECK1-NEXT:    store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8
-// CHECK1-NEXT:    [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
-// CHECK1-NEXT:    store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8
+// CHECK1-NEXT:    store ptr [[ARR]], ptr [[__RANGE221]], align 8
+// CHECK1-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0
+// CHECK1-NEXT:    [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256
+// CHECK1-NEXT:    store ptr [[ADD_PTR24]], ptr [[__END222]], align 8
+// CHECK1-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8
+// CHECK1-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK1-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8
+// CHECK1-NEXT:    store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK1-NEXT:    [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64
+// CHECK1-NEXT:    [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64
+// CHECK1-NEXT:    [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]]
+// CHECK1-NEXT:    [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8
+// CHECK1-NEXT:    [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1
+// CHECK1-NEXT:    [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1
+// CHECK1-NEXT:    [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1
+// CHECK1-NEXT:    [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1
+// CHECK1-NEXT:    store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8
 // CHECK1-NEXT:    store i64 0, ptr [[DOTOMP_LB2]], align 8
 // CHECK1-NEXT:    store i64 1, ptr [[DOTOMP_ST2]], align 8
-// CHECK1-NEXT:    [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
-// CHECK1-NEXT:    [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1
-// CHECK1-NEXT:    store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8
-// CHECK1-NEXT:    [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
-// CHECK1-NEXT:    store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8
-// CHECK1-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
-// CHECK1-NEXT:    [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
-// CHECK1-NEXT:    [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]]
-// CHECK1-NEXT:    br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]]
-// CHECK1:       [[COND_TRUE44]]:
-// CHECK1-NEXT:    [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
-// CHECK1-NEXT:    br label %[[COND_END46:.*]]
-// CHECK1:       [[COND_FALSE45]]:
-// CHECK1-NEXT:    [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
-// CHECK1-NEXT:    br label %[[COND_END46]]
-// CHECK1:       [[COND_END46]]:
-// CHECK1-NEXT:    [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ]
-// CHECK1-NEXT:    store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8
-// CHECK1-NEXT:    [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
-// CHECK1-NEXT:    [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
-// CHECK1-NEXT:    [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]]
-// CHECK1-NEXT:    br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]]
-// CHECK1:       [[COND_TRUE50]]:
-// CHECK1-NEXT:    [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
-// CHECK1-NEXT:    br label %[[COND_END52:.*]]
-// CHECK1:       [[COND_FALSE51]]:
-// CHECK1-NEXT:    [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
-// CHECK1-NEXT:    br label %[[COND_END52]]
-// CHECK1:       [[COND_END52]]:
-// CHECK1-NEXT:    [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ]
-// CHECK1-NEXT:    store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8
-// CHECK1-NEXT:    store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8
+// CHECK1-NEXT:    [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1
+// CHECK1-NEXT:    store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT:    [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT:    store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK1-NEXT:    [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT:    [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]]
+// CHECK1-NEXT:    br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]]
+// CHECK1:       [[COND_TRUE42]]:
+// CHECK1-NEXT:    [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK1-NEXT:    br label %[[COND_END44:.*]]
+// CHECK1:       [[COND_FALSE43]]:
+// CHECK1-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT:    br label %[[COND_END44]]
+// CHECK1:       [[COND_END44]]:
+// CHECK1-NEXT:    [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ]
+// CHECK1-NEXT:    store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT:    [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT:    [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT:    [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]]
+// CHECK1-NEXT:    br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]]
+// CHECK1:       [[COND_TRUE48]]:
+// CHECK1-NEXT:    [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT:    br label %[[COND_END50:.*]]
+// CHECK1:       [[COND_FALSE49]]:
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT:    br label %[[COND_END50]]
+// CHECK1:       [[COND_END50]]:
+// CHECK1-NEXT:    [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ]
+// CHECK1-NEXT:    store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK1-NEXT:    store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8
 // CHECK1-NEXT:    br label %[[FOR_COND:.*]]
 // CHECK1:       [[FOR_COND]]:
-// CHECK1-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT:    [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8
-// CHECK1-NEXT:    [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]]
-// CHECK1-NEXT:    br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1-NEXT:    [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK1-NEXT:    [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT:    br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
 // CHECK1:       [[FOR_BODY]]:
-// CHECK1-NEXT:    [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT:    [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
-// CHECK1-NEXT:    [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]]
-// CHECK1-NEXT:    br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]]
+// CHECK1-NEXT:    [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT:    [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]]
+// CHECK1-NEXT:    br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]]
 // CHECK1:       [[IF_THEN]]:
-// CHECK1-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4
-// CHECK1-NEXT:    [[CONV57:%.*]] = sext i32 [[TMP38]] to i64
-// CHECK1-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4
-// CHECK1-NEXT:    [[CONV58:%.*]] = sext i32 [[TMP39]] to i64
-// CHECK1-NEXT:    [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]]
-// CHECK1-NEXT:    [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]]
-// CHECK1-NEXT:    [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32
-// CHECK1-NEXT:    store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4
-// CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4
-// CHECK1-NEXT:    [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1
-// CHECK1-NEXT:    [[ADD62:%.*]] = add nsw i32 0, [[MUL61]]
-// CHECK1-NEXT:    store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK1-NEXT:    [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]]
-// CHECK1-NEXT:    br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]]
-// CHECK1:       [[IF_THEN64]]:
-// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
-// CHECK1-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
-// CHECK1-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]]
-// CHECK1-NEXT:    [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]]
-// CHECK1-NEXT:    store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4
-// CHECK1-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
-// CHECK1-NEXT:    [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1
-// CHECK1-NEXT:    [[ADD68:%.*]] = add nsw i32 0, [[MUL67]]
-// CHECK1-NEXT:    store i32 [[ADD68]], ptr [[I]], align 4
-// CHECK1-NEXT:    [[TMP48:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP48]])
+// CHECK1-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT:    [[CONV55:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK1-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK1-NEXT:    [[CONV56:%.*]] = sext i32 [[TMP36]] to i64
+// CHECK1-NEXT:    [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]]
+// CHECK1-NEXT:    [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]]
+// CHECK1-NEXT:    [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32
+// CHECK1-NEXT:    store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT:    [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1
+// CHECK1-NEXT:    [[ADD60:%.*]] = add nsw i32 0, [[MUL59]]
+// CHECK1-NEXT:    store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]]
+// CHECK1-NEXT:    br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]]
+// CHECK1:       [[IF_THEN62]]:
+// CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT:    [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]]
+// CHECK1-NEXT:    store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1
+// CHECK1-NEXT:    [[ADD66:%.*]] = add nsw i32 0, [[MUL65]]
+// CHECK1-NEXT:    store i32 [[ADD66]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP45:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP45]])
 // CHECK1-NEXT:    br label %[[IF_END]]
 // CHECK1:       [[IF_END]]:
-// CHECK1-NEXT:    [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK1-NEXT:    [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]]
-// CHECK1-NEXT:    br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]]
-// CHECK1:       [[IF_THEN70]]:
-// CHECK1-NEXT:    [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
-// CHECK1-NEXT:    [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
-// CHECK1-NEXT:    [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT:    [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]]
-// CHECK1-NEXT:    [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]]
-// CHECK1-NEXT:    store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4
-// CHECK1-NEXT:    [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
-// CHECK1-NEXT:    [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2
-// CHECK1-NEXT:    [[ADD74:%.*]] = add nsw i32 0, [[MUL73]]
-// CHECK1-NEXT:    store i32 [[ADD74]], ptr [[J]], align 4
-// CHECK1-NEXT:    [[TMP55:%.*]] = load i32, ptr [[J]], align 4
-// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP55]])
-// CHECK1-NEXT:    br label %[[IF_END75]]
-// CHECK1:       [[IF_END75]]:
-// CHECK1-NEXT:    br label %[[IF_END76]]
-// CHECK1:       [[IF_END76]]:
-// CHECK1-NEXT:    [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT:    [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
-// CHECK1-NEXT:    [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]]
-// CHECK1-NEXT:    br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]]
-// CHECK1:       [[IF_THEN78]]:
-// CHECK1-NEXT:    [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8
-// CHECK1-NEXT:    [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8
-// CHECK1-NEXT:    [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT:    [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]]
-// CHECK1-NEXT:    [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]]
-// CHECK1-NEXT:    store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8
-// CHECK1-NEXT:    [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
-// CHECK1-NEXT:    [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8
-// CHECK1-NEXT:    [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1
-// CHECK1-NEXT:    [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]]
-// CHECK1-NEXT:    store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8
-// CHECK1-NEXT:    [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
-// CHECK1-NEXT:    store ptr [[TMP63]], ptr [[V]], align 8
-// CHECK1-NEXT:    [[TMP64:%.*]] = load i32, ptr [[C]], align 4
-// CHECK1-NEXT:    [[TMP65:%.*]] = load ptr, ptr [[V]], align 8
-// CHECK1-NEXT:    [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8
-// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]])
-// CHECK1-NEXT:    br label %[[IF_END83]]
-// CHECK1:       [[IF_END83]]:
-// CHECK1-NEXT:    [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT:    [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
-// CHECK1-NEXT:    [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]]
-// CHECK1-NEXT:    br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]]
-// CHECK1:       [[IF_THEN85]]:
-// CHECK1-NEXT:    [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
-// CHECK1-NEXT:    [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
-// CHECK1-NEXT:    [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT:    [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]]
-// CHECK1-NEXT:    [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]]
-// CHECK1-NEXT:    store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8
-// CHECK1-NEXT:    [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
-// CHECK1-NEXT:    [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
-// CHECK1-NEXT:    [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1
-// CHECK1-NEXT:    [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]]
-// CHECK1-NEXT:    store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8
-// CHECK1-NEXT:    [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8
-// CHECK1-NEXT:    store ptr [[TMP74]], ptr [[VV]], align 8
-// CHECK1-NEXT:    [[TMP75:%.*]] = load i32, ptr [[CC]], align 4
-// CHECK1-NEXT:    [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8
-// CHECK1-NEXT:    [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8
-// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]])
-// CHECK1-NEXT:    br label %[[IF_END90]]
-// CHECK1:       [[IF_END90]]:
+// CHECK1-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]]
+// CHECK1-NEXT:    br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]]
+// CHECK1:       [[IF_THEN68]]:
+// CHECK1-NEXT:    [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]]
+// CHECK1-NEXT:    [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]]
+// CHECK1-NEXT:    store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2
+// CHECK1-NEXT:    [[ADD72:%.*]] = add nsw i32 0, [[MUL71]]
+// CHECK1-NEXT:    store i32 [[ADD72]], ptr [[J]], align 4
+// CHECK1-NEXT:    [[TMP52:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP52]])
+// CHECK1-NEXT:    br label %[[IF_END73]]
+// CHECK1:       [[IF_END73]]:
+// CHECK1-NEXT:    br label %[[IF_END74]]
+// CHECK1:       [[IF_END74]]:
+// CHECK1-NEXT:    [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT:    [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT:    [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]]
+// CHECK1-NEXT:    br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]]
+// CHECK1:       [[IF_THEN76]]:
+// CHECK1-NEXT:    [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT:    [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT:    [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT:    [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]]
+// CHECK1-NEXT:    [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]]
+// CHECK1-NEXT:    store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT:    [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT:    [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT:    [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1
+// CHECK1-NEXT:    [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]]
+// CHECK1-NEXT:    store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    store ptr [[TMP60]], ptr [[V]], align 8
+// CHECK1-NEXT:    [[TMP61:%.*]] = load i32, ptr [[C]], align 4
+// CHECK1-NEXT:    [[TMP62:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK1-NEXT:    [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]])
+// CHECK1-NEXT:    br label %[[IF_END81]]
+// CHECK1:       [[IF_END81]]:
+// CHECK1-NEXT:    [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT:    [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT:    [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]]
+// CHECK1-NEXT:    br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]]
+// CHECK1:       [[IF_THEN83]]:
+// CHECK1-NEXT:    [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
+// CHECK1-NEXT:    [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
+// CHECK1-NEXT:    [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT:    [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]]
+// CHECK1-NEXT:    [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]]
+// CHECK1-NEXT:    store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8
+// CHECK1-NEXT:    [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK1-NEXT:    [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
+// CHECK1-NEXT:    [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1
+// CHECK1-NEXT:    [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]]
+// CHECK1-NEXT:    store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8
+// CHECK1-NEXT:    [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8
+// CHECK1-NEXT:    store ptr [[TMP71]], ptr [[VV]], align 8
+// CHECK1-NEXT:    [[TMP72:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK1-NEXT:    [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK1-NEXT:    [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]])
+// CHECK1-NEXT:    br label %[[IF_END88]]
+// CHECK1:       [[IF_END88]]:
 // CHECK1-NEXT:    br label %[[FOR_INC:.*]]
 // CHECK1:       [[FOR_INC]]:
-// CHECK1-NEXT:    [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT:    [[INC:%.*]] = add nsw i64 [[TMP78]], 1
-// CHECK1-NEXT:    store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT:    [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i64 [[TMP75]], 1
+// CHECK1-NEXT:    store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8
 // CHECK1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
 // CHECK1:       [[FOR_END]]:
 // CHECK1-NEXT:    ret void
@@ -794,13 +783,11 @@ extern "C" void foo4() {
 // CHECK1-NEXT:  [[ENTRY:.*:]]
 // CHECK1-NEXT:    [[ARR:%.*]] = alloca [256 x double], align 16
 // CHECK1-NEXT:    [[J:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[DOTOMP_UB0:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[K:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[DOTOMP_UB1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -815,12 +802,10 @@ extern "C" void foo4() {
 // CHECK1-NEXT:    [[__END2:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[V:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    store i32 0, ptr [[J]], align 4
-// CHECK1-NEXT:    store i32 127, ptr [[DOTOMP_UB0]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
 // CHECK1-NEXT:    store i32 128, ptr [[DOTOMP_NI0]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[K]], align 4
-// CHECK1-NEXT:    store i32 63, ptr [[DOTOMP_UB1]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
 // CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
 // CHECK1-NEXT:    store i32 64, ptr [[DOTOMP_NI1]], align 4
@@ -940,6 +925,277 @@ extern "C" void foo4() {
 // CHECK1-NEXT:    ret void
 //
 //
+// CHECK1-LABEL: define dso_local void @foo5(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT:  [[ENTRY:.*:]]
+// CHECK1-NEXT:    [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK1-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[K:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_IV06:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[C:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__END2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_IV120:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[V:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[__RANGE264:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__BEGIN265:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__END267:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[VV:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[K]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    store i32 512, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK1-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1:       [[COND_TRUE]]:
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END:.*]]
+// CHECK1:       [[COND_FALSE]]:
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    br label %[[COND_END]]
+// CHECK1:       [[COND_END]]:
+// CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK1-NEXT:    store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT:    store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK1-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK1-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT:    store i32 1, ptr [[DOTOMP_ST04]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK1-NEXT:    [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK1-NEXT:    store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT:    store i32 42, ptr [[C]], align 4
+// CHECK1-NEXT:    store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
+// CHECK1-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK1-NEXT:    store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK1-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK1-NEXT:    store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT:    [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK1-NEXT:    [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK1-NEXT:    [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK1-NEXT:    [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK1-NEXT:    [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK1-NEXT:    [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK1-NEXT:    [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK1-NEXT:    [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK1-NEXT:    store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT:    store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT:    store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT:    [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK1-NEXT:    store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT:    store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT:    [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]]
+// CHECK1-NEXT:    br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]]
+// CHECK1:       [[COND_TRUE24]]:
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK1-NEXT:    br label %[[COND_END26:.*]]
+// CHECK1:       [[COND_FALSE25]]:
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT:    br label %[[COND_END26]]
+// CHECK1:       [[COND_END26]]:
+// CHECK1-NEXT:    [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ]
+// CHECK1-NEXT:    store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK1:       [[FOR_COND]]:
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128
+// CHECK1-NEXT:    br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1:       [[FOR_BODY]]:
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP21]])
+// CHECK1-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK1:       [[FOR_INC]]:
+// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP22]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[I]], align 4
+// CHECK1-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK1:       [[FOR_END]]:
+// CHECK1-NEXT:    store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT:    br label %[[FOR_COND30:.*]]
+// CHECK1:       [[FOR_COND30]]:
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT:    [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK1-NEXT:    [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]]
+// CHECK1-NEXT:    br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]]
+// CHECK1:       [[FOR_BODY32]]:
+// CHECK1-NEXT:    [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT:    [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]]
+// CHECK1-NEXT:    br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]]
+// CHECK1:       [[IF_THEN]]:
+// CHECK1-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT:    [[CONV34:%.*]] = sext i32 [[TMP27]] to i64
+// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK1-NEXT:    [[CONV35:%.*]] = sext i32 [[TMP28]] to i64
+// CHECK1-NEXT:    [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]]
+// CHECK1-NEXT:    [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]]
+// CHECK1-NEXT:    [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32
+// CHECK1-NEXT:    store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT:    [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1
+// CHECK1-NEXT:    [[ADD39:%.*]] = add nsw i32 0, [[MUL38]]
+// CHECK1-NEXT:    store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT:    [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT:    br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]]
+// CHECK1:       [[IF_THEN41]]:
+// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]]
+// CHECK1-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]]
+// CHECK1-NEXT:    store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT:    [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2
+// CHECK1-NEXT:    [[ADD45:%.*]] = add nsw i32 0, [[MUL44]]
+// CHECK1-NEXT:    store i32 [[ADD45]], ptr [[J]], align 4
+// CHECK1-NEXT:    [[TMP37:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP37]])
+// CHECK1-NEXT:    br label %[[IF_END]]
+// CHECK1:       [[IF_END]]:
+// CHECK1-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT:    [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]]
+// CHECK1-NEXT:    br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK1:       [[IF_THEN47]]:
+// CHECK1-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT:    [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]]
+// CHECK1-NEXT:    [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]]
+// CHECK1-NEXT:    store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT:    [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1
+// CHECK1-NEXT:    [[ADD51:%.*]] = add nsw i32 0, [[MUL50]]
+// CHECK1-NEXT:    store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP44]])
+// CHECK1-NEXT:    br label %[[IF_END52]]
+// CHECK1:       [[IF_END52]]:
+// CHECK1-NEXT:    br label %[[IF_END53]]
+// CHECK1:       [[IF_END53]]:
+// CHECK1-NEXT:    [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT:    [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT:    [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]]
+// CHECK1-NEXT:    br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]]
+// CHECK1:       [[IF_THEN55]]:
+// CHECK1-NEXT:    [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT:    [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT:    [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT:    [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]]
+// CHECK1-NEXT:    [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]]
+// CHECK1-NEXT:    store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT:    [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT:    [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT:    [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1
+// CHECK1-NEXT:    [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]]
+// CHECK1-NEXT:    store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT:    store ptr [[TMP52]], ptr [[V]], align 8
+// CHECK1-NEXT:    [[TMP53:%.*]] = load i32, ptr [[C]], align 4
+// CHECK1-NEXT:    [[TMP54:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK1-NEXT:    [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]])
+// CHECK1-NEXT:    br label %[[IF_END60]]
+// CHECK1:       [[IF_END60]]:
+// CHECK1-NEXT:    br label %[[FOR_INC61:.*]]
+// CHECK1:       [[FOR_INC61]]:
+// CHECK1-NEXT:    [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT:    [[INC62:%.*]] = add nsw i64 [[TMP56]], 1
+// CHECK1-NEXT:    store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT:    br label %[[FOR_COND30]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK1:       [[FOR_END63]]:
+// CHECK1-NEXT:    store i32 37, ptr [[CC]], align 4
+// CHECK1-NEXT:    store ptr [[ARR]], ptr [[__RANGE264]], align 8
+// CHECK1-NEXT:    [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT:    [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK1-NEXT:    [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0
+// CHECK1-NEXT:    [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256
+// CHECK1-NEXT:    store ptr [[ADD_PTR69]], ptr [[__END267]], align 8
+// CHECK1-NEXT:    br label %[[FOR_COND70:.*]]
+// CHECK1:       [[FOR_COND70]]:
+// CHECK1-NEXT:    [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT:    [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8
+// CHECK1-NEXT:    [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]]
+// CHECK1-NEXT:    br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]]
+// CHECK1:       [[FOR_BODY72]]:
+// CHECK1-NEXT:    [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT:    store ptr [[TMP61]], ptr [[VV]], align 8
+// CHECK1-NEXT:    [[TMP62:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK1-NEXT:    [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK1-NEXT:    [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]])
+// CHECK1-NEXT:    br label %[[FOR_INC73:.*]]
+// CHECK1:       [[FOR_INC73]]:
+// CHECK1-NEXT:    [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1
+// CHECK1-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT:    br label %[[FOR_COND70]]
+// CHECK1:       [[FOR_END74]]:
+// CHECK1-NEXT:    ret void
+//
+//
 // CHECK2-LABEL: define dso_local void @body(
 // CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] {
 // CHECK2-NEXT:  [[ENTRY:.*:]]
@@ -961,7 +1217,6 @@ extern "C" void foo4() {
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTNEW_STEP:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[DOTOMP_UB0:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
@@ -970,7 +1225,6 @@ extern "C" void foo4() {
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[DOTOMP_UB1:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -1002,107 +1256,103 @@ extern "C" void foo4() {
 // CHECK2-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
 // CHECK2-NEXT:    [[SUB4:%.*]] = sub i32 [[DIV]], 1
 // CHECK2-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT:    store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
 // CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
 // CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
-// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT:    [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT:    [[ADD5:%.*]] = add i32 [[TMP8]], 1
 // CHECK2-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP9]], ptr [[J]], align 4
 // CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4
-// CHECK2-NEXT:    store i32 [[TMP10]], ptr [[J]], align 4
-// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4
-// CHECK2-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4
-// CHECK2-NEXT:    store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
-// CHECK2-NEXT:    store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK2-NEXT:    [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK2-NEXT:    store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT:    [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
 // CHECK2-NEXT:    [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
 // CHECK2-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT:    [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
-// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT:    [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK2-NEXT:    [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
 // CHECK2-NEXT:    [[SUB14:%.*]] = sub i32 [[DIV13]], 1
 // CHECK2-NEXT:    store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK2-NEXT:    store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
 // CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
 // CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
-// CHECK2-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK2-NEXT:    [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT:    [[ADD15:%.*]] = add i32 [[TMP17]], 1
 // CHECK2-NEXT:    store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
-// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK2-NEXT:    store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK2-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]]
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]]
 // CHECK2-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
 // CHECK2:       [[COND_TRUE]]:
-// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
 // CHECK2-NEXT:    br label %[[COND_END:.*]]
 // CHECK2:       [[COND_FALSE]]:
-// CHECK2-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
 // CHECK2-NEXT:    br label %[[COND_END]]
 // CHECK2:       [[COND_END]]:
-// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ]
+// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ]
 // CHECK2-NEXT:    store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
 // CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
 // CHECK2-NEXT:    br label %[[FOR_COND:.*]]
 // CHECK2:       [[FOR_COND]]:
-// CHECK2-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
-// CHECK2-NEXT:    [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]]
 // CHECK2-NEXT:    br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
 // CHECK2:       [[FOR_BODY]]:
-// CHECK2-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK2-NEXT:    [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]]
+// CHECK2-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
 // CHECK2-NEXT:    br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
 // CHECK2:       [[IF_THEN]]:
-// CHECK2-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
-// CHECK2-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
-// CHECK2-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]]
-// CHECK2-NEXT:    [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]]
+// CHECK2-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]]
+// CHECK2-NEXT:    [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]]
 // CHECK2-NEXT:    store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4
-// CHECK2-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
-// CHECK2-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
-// CHECK2-NEXT:    [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]]
-// CHECK2-NEXT:    [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]]
+// CHECK2-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT:    [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT:    [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]]
 // CHECK2-NEXT:    store i32 [[ADD20]], ptr [[I]], align 4
-// CHECK2-NEXT:    [[TMP35:%.*]] = load i32, ptr [[I]], align 4
-// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP35]])
+// CHECK2-NEXT:    [[TMP33:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP33]])
 // CHECK2-NEXT:    br label %[[IF_END]]
 // CHECK2:       [[IF_END]]:
-// CHECK2-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK2-NEXT:    [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]]
+// CHECK2-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]]
 // CHECK2-NEXT:    br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]]
 // CHECK2:       [[IF_THEN22]]:
-// CHECK2-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
-// CHECK2-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
-// CHECK2-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]]
-// CHECK2-NEXT:    [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]]
+// CHECK2-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]]
+// CHECK2-NEXT:    [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]]
 // CHECK2-NEXT:    store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4
-// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK2-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
-// CHECK2-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT:    [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]]
-// CHECK2-NEXT:    [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]]
+// CHECK2-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]]
+// CHECK2-NEXT:    [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]]
 // CHECK2-NEXT:    store i32 [[ADD26]], ptr [[J]], align 4
-// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, ptr [[J]], align 4
-// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP44]])
+// CHECK2-NEXT:    [[TMP42:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP42]])
 // CHECK2-NEXT:    br label %[[IF_END27]]
 // CHECK2:       [[IF_END27]]:
 // CHECK2-NEXT:    br label %[[FOR_INC:.*]]
 // CHECK2:       [[FOR_INC]]:
-// CHECK2-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[INC:%.*]] = add i32 [[TMP45]], 1
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add i32 [[TMP43]], 1
 // CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
 // CHECK2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
 // CHECK2:       [[FOR_END]]:
@@ -1114,13 +1364,11 @@ extern "C" void foo4() {
 // CHECK2-NEXT:  [[ENTRY:.*:]]
 // CHECK2-NEXT:    [[ARR:%.*]] = alloca [256 x double], align 16
 // CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[DOTOMP_UB0:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[J:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[DOTOMP_UB1:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -1130,48 +1378,43 @@ extern "C" void foo4() {
 // CHECK2-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[DOTOMP_UB03:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[DOTOMP_LB04:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[DOTOMP_ST05:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[DOTOMP_NI06:%.*]] = alloca i64, align 8
-// CHECK2-NEXT:    [[DOTOMP_IV07:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_IV06:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[C:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[__RANGE2:%.*]] = alloca ptr, align 8
 // CHECK2-NEXT:    [[__END2:%.*]] = alloca ptr, align 8
 // CHECK2-NEXT:    [[__BEGIN2:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8
-// CHECK2-NEXT:    [[DOTOMP_UB117:%.*]] = alloca i64, align 8
-// CHECK2-NEXT:    [[DOTOMP_LB118:%.*]] = alloca i64, align 8
-// CHECK2-NEXT:    [[DOTOMP_ST119:%.*]] = alloca i64, align 8
-// CHECK2-NEXT:    [[DOTOMP_NI120:%.*]] = alloca i64, align 8
-// CHECK2-NEXT:    [[DOTOMP_IV122:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_IV120:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[CC:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[__RANGE223:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT:    [[__END224:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT:    [[__BEGIN227:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__RANGE221:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__END222:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__BEGIN225:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8
-// CHECK2-NEXT:    [[DOTOMP_UB2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[DOTOMP_LB2:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[DOTOMP_ST2:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[DOTOMP_NI2:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[DOTOMP_IV2:%.*]] = alloca i64, align 8
-// CHECK2-NEXT:    [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8
-// CHECK2-NEXT:    [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8
-// CHECK2-NEXT:    [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[V:%.*]] = alloca ptr, align 8
 // CHECK2-NEXT:    [[VV:%.*]] = alloca ptr, align 8
 // CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT:    store i32 127, ptr [[DOTOMP_UB0]], align 4
 // CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
 // CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
 // CHECK2-NEXT:    store i32 128, ptr [[DOTOMP_NI0]], align 4
 // CHECK2-NEXT:    store i32 0, ptr [[J]], align 4
-// CHECK2-NEXT:    store i32 127, ptr [[DOTOMP_UB1]], align 4
 // CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
 // CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
 // CHECK2-NEXT:    store i32 128, ptr [[DOTOMP_NI1]], align 4
@@ -1198,225 +1441,219 @@ extern "C" void foo4() {
 // CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK2-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK2-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST04]], align 4
 // CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT:    store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4
-// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB04]], align 4
-// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST05]], align 4
-// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
 // CHECK2-NEXT:    [[CONV:%.*]] = sext i32 [[ADD]] to i64
-// CHECK2-NEXT:    store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8
+// CHECK2-NEXT:    store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
 // CHECK2-NEXT:    store i32 42, ptr [[C]], align 4
 // CHECK2-NEXT:    store ptr [[ARR]], ptr [[__RANGE2]], align 8
-// CHECK2-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
-// CHECK2-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK2-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
 // CHECK2-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
 // CHECK2-NEXT:    store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK2-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
 // CHECK2-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
-// CHECK2-NEXT:    [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
-// CHECK2-NEXT:    store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8
-// CHECK2-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8
-// CHECK2-NEXT:    [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0
-// CHECK2-NEXT:    store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8
-// CHECK2-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8
-// CHECK2-NEXT:    store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8
-// CHECK2-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8
-// CHECK2-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
-// CHECK2-NEXT:    [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
-// CHECK2-NEXT:    [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64
+// CHECK2-NEXT:    [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK2-NEXT:    store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT:    [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK2-NEXT:    [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
 // CHECK2-NEXT:    [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
 // CHECK2-NEXT:    [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
-// CHECK2-NEXT:    [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
-// CHECK2-NEXT:    [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1
-// CHECK2-NEXT:    [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1
-// CHECK2-NEXT:    [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1
-// CHECK2-NEXT:    store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8
-// CHECK2-NEXT:    [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
-// CHECK2-NEXT:    store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8
-// CHECK2-NEXT:    store i64 0, ptr [[DOTOMP_LB118]], align 8
-// CHECK2-NEXT:    store i64 1, ptr [[DOTOMP_ST119]], align 8
-// CHECK2-NEXT:    [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
-// CHECK2-NEXT:    [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1
-// CHECK2-NEXT:    store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8
+// CHECK2-NEXT:    [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK2-NEXT:    [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK2-NEXT:    [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK2-NEXT:    [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK2-NEXT:    store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT:    store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT:    store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT:    [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK2-NEXT:    store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
 // CHECK2-NEXT:    store i32 37, ptr [[CC]], align 4
-// CHECK2-NEXT:    store ptr [[ARR]], ptr [[__RANGE223]], align 8
-// CHECK2-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8
-// CHECK2-NEXT:    [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
-// CHECK2-NEXT:    [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256
-// CHECK2-NEXT:    store ptr [[ADD_PTR26]], ptr [[__END224]], align 8
-// CHECK2-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8
-// CHECK2-NEXT:    [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0
-// CHECK2-NEXT:    store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8
-// CHECK2-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8
-// CHECK2-NEXT:    [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0
-// CHECK2-NEXT:    store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8
-// CHECK2-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8
-// CHECK2-NEXT:    store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8
-// CHECK2-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8
-// CHECK2-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
-// CHECK2-NEXT:    [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64
-// CHECK2-NEXT:    [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64
-// CHECK2-NEXT:    [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]]
-// CHECK2-NEXT:    [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8
-// CHECK2-NEXT:    [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1
-// CHECK2-NEXT:    [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1
-// CHECK2-NEXT:    [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1
-// CHECK2-NEXT:    [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1
-// CHECK2-NEXT:    store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8
-// CHECK2-NEXT:    [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
-// CHECK2-NEXT:    store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8
+// CHECK2-NEXT:    store ptr [[ARR]], ptr [[__RANGE221]], align 8
+// CHECK2-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0
+// CHECK2-NEXT:    [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256
+// CHECK2-NEXT:    store ptr [[ADD_PTR24]], ptr [[__END222]], align 8
+// CHECK2-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8
+// CHECK2-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK2-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8
+// CHECK2-NEXT:    store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK2-NEXT:    [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64
+// CHECK2-NEXT:    [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64
+// CHECK2-NEXT:    [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]]
+// CHECK2-NEXT:    [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8
+// CHECK2-NEXT:    [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1
+// CHECK2-NEXT:    [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1
+// CHECK2-NEXT:    [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1
+// CHECK2-NEXT:    [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1
+// CHECK2-NEXT:    store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8
 // CHECK2-NEXT:    store i64 0, ptr [[DOTOMP_LB2]], align 8
 // CHECK2-NEXT:    store i64 1, ptr [[DOTOMP_ST2]], align 8
-// CHECK2-NEXT:    [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
-// CHECK2-NEXT:    [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1
-// CHECK2-NEXT:    store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8
-// CHECK2-NEXT:    [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
-// CHECK2-NEXT:    store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8
-// CHECK2-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
-// CHECK2-NEXT:    [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
-// CHECK2-NEXT:    [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]]
-// CHECK2-NEXT:    br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]]
-// CHECK2:       [[COND_TRUE44]]:
-// CHECK2-NEXT:    [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
-// CHECK2-NEXT:    br label %[[COND_END46:.*]]
-// CHECK2:       [[COND_FALSE45]]:
-// CHECK2-NEXT:    [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
-// CHECK2-NEXT:    br label %[[COND_END46]]
-// CHECK2:       [[COND_END46]]:
-// CHECK2-NEXT:    [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ]
-// CHECK2-NEXT:    store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8
-// CHECK2-NEXT:    [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
-// CHECK2-NEXT:    [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
-// CHECK2-NEXT:    [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]]
-// CHECK2-NEXT:    br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]]
-// CHECK2:       [[COND_TRUE50]]:
-// CHECK2-NEXT:    [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
-// CHECK2-NEXT:    br label %[[COND_END52:.*]]
-// CHECK2:       [[COND_FALSE51]]:
-// CHECK2-NEXT:    [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
-// CHECK2-NEXT:    br label %[[COND_END52]]
-// CHECK2:       [[COND_END52]]:
-// CHECK2-NEXT:    [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ]
-// CHECK2-NEXT:    store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8
-// CHECK2-NEXT:    store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8
+// CHECK2-NEXT:    [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1
+// CHECK2-NEXT:    store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT:    store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK2-NEXT:    [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT:    [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]]
+// CHECK2-NEXT:    br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]]
+// CHECK2:       [[COND_TRUE42]]:
+// CHECK2-NEXT:    [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK2-NEXT:    br label %[[COND_END44:.*]]
+// CHECK2:       [[COND_FALSE43]]:
+// CHECK2-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT:    br label %[[COND_END44]]
+// CHECK2:       [[COND_END44]]:
+// CHECK2-NEXT:    [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ]
+// CHECK2-NEXT:    store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT:    [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT:    [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT:    [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]]
+// CHECK2-NEXT:    br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]]
+// CHECK2:       [[COND_TRUE48]]:
+// CHECK2-NEXT:    [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT:    br label %[[COND_END50:.*]]
+// CHECK2:       [[COND_FALSE49]]:
+// CHECK2-NEXT:    [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT:    br label %[[COND_END50]]
+// CHECK2:       [[COND_END50]]:
+// CHECK2-NEXT:    [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ]
+// CHECK2-NEXT:    store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK2-NEXT:    store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8
 // CHECK2-NEXT:    br label %[[FOR_COND:.*]]
 // CHECK2:       [[FOR_COND]]:
-// CHECK2-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT:    [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8
-// CHECK2-NEXT:    [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]]
-// CHECK2-NEXT:    br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2-NEXT:    [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT:    [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK2-NEXT:    [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT:    br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
 // CHECK2:       [[FOR_BODY]]:
-// CHECK2-NEXT:    [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT:    [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
-// CHECK2-NEXT:    [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]]
-// CHECK2-NEXT:    br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]]
+// CHECK2-NEXT:    [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT:    [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]]
+// CHECK2-NEXT:    br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]]
 // CHECK2:       [[IF_THEN]]:
-// CHECK2-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4
-// CHECK2-NEXT:    [[CONV57:%.*]] = sext i32 [[TMP38]] to i64
-// CHECK2-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4
-// CHECK2-NEXT:    [[CONV58:%.*]] = sext i32 [[TMP39]] to i64
-// CHECK2-NEXT:    [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]]
-// CHECK2-NEXT:    [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]]
-// CHECK2-NEXT:    [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32
-// CHECK2-NEXT:    store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4
-// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4
-// CHECK2-NEXT:    [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1
-// CHECK2-NEXT:    [[ADD62:%.*]] = add nsw i32 0, [[MUL61]]
-// CHECK2-NEXT:    store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK2-NEXT:    [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]]
-// CHECK2-NEXT:    br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]]
-// CHECK2:       [[IF_THEN64]]:
-// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
-// CHECK2-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
-// CHECK2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]]
-// CHECK2-NEXT:    [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]]
-// CHECK2-NEXT:    store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4
-// CHECK2-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
-// CHECK2-NEXT:    [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1
-// CHECK2-NEXT:    [[ADD68:%.*]] = add nsw i32 0, [[MUL67]]
-// CHECK2-NEXT:    store i32 [[ADD68]], ptr [[I]], align 4
-// CHECK2-NEXT:    [[TMP48:%.*]] = load i32, ptr [[I]], align 4
-// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP48]])
+// CHECK2-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT:    [[CONV55:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK2-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK2-NEXT:    [[CONV56:%.*]] = sext i32 [[TMP36]] to i64
+// CHECK2-NEXT:    [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]]
+// CHECK2-NEXT:    [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]]
+// CHECK2-NEXT:    [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32
+// CHECK2-NEXT:    store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT:    [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1
+// CHECK2-NEXT:    [[ADD60:%.*]] = add nsw i32 0, [[MUL59]]
+// CHECK2-NEXT:    store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]]
+// CHECK2-NEXT:    br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]]
+// CHECK2:       [[IF_THEN62]]:
+// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT:    [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]]
+// CHECK2-NEXT:    store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1
+// CHECK2-NEXT:    [[ADD66:%.*]] = add nsw i32 0, [[MUL65]]
+// CHECK2-NEXT:    store i32 [[ADD66]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP45:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP45]])
 // CHECK2-NEXT:    br label %[[IF_END]]
 // CHECK2:       [[IF_END]]:
-// CHECK2-NEXT:    [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK2-NEXT:    [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]]
-// CHECK2-NEXT:    br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]]
-// CHECK2:       [[IF_THEN70]]:
-// CHECK2-NEXT:    [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
-// CHECK2-NEXT:    [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
-// CHECK2-NEXT:    [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]]
-// CHECK2-NEXT:    [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]]
-// CHECK2-NEXT:    store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4
-// CHECK2-NEXT:    [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
-// CHECK2-NEXT:    [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2
-// CHECK2-NEXT:    [[ADD74:%.*]] = add nsw i32 0, [[MUL73]]
-// CHECK2-NEXT:    store i32 [[ADD74]], ptr [[J]], align 4
-// CHECK2-NEXT:    [[TMP55:%.*]] = load i32, ptr [[J]], align 4
-// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP55]])
-// CHECK2-NEXT:    br label %[[IF_END75]]
-// CHECK2:       [[IF_END75]]:
-// CHECK2-NEXT:    br label %[[IF_END76]]
-// CHECK2:       [[IF_END76]]:
-// CHECK2-NEXT:    [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT:    [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
-// CHECK2-NEXT:    [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]]
-// CHECK2-NEXT:    br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]]
-// CHECK2:       [[IF_THEN78]]:
-// CHECK2-NEXT:    [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8
-// CHECK2-NEXT:    [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8
-// CHECK2-NEXT:    [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT:    [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]]
-// CHECK2-NEXT:    [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]]
-// CHECK2-NEXT:    store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8
-// CHECK2-NEXT:    [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
-// CHECK2-NEXT:    [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8
-// CHECK2-NEXT:    [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1
-// CHECK2-NEXT:    [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]]
-// CHECK2-NEXT:    store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8
-// CHECK2-NEXT:    [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
-// CHECK2-NEXT:    store ptr [[TMP63]], ptr [[V]], align 8
-// CHECK2-NEXT:    [[TMP64:%.*]] = load i32, ptr [[C]], align 4
-// CHECK2-NEXT:    [[TMP65:%.*]] = load ptr, ptr [[V]], align 8
-// CHECK2-NEXT:    [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8
-// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]])
-// CHECK2-NEXT:    br label %[[IF_END83]]
-// CHECK2:       [[IF_END83]]:
-// CHECK2-NEXT:    [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT:    [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
-// CHECK2-NEXT:    [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]]
-// CHECK2-NEXT:    br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]]
-// CHECK2:       [[IF_THEN85]]:
-// CHECK2-NEXT:    [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
-// CHECK2-NEXT:    [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
-// CHECK2-NEXT:    [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT:    [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]]
-// CHECK2-NEXT:    [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]]
-// CHECK2-NEXT:    store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8
-// CHECK2-NEXT:    [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
-// CHECK2-NEXT:    [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
-// CHECK2-NEXT:    [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1
-// CHECK2-NEXT:    [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]]
-// CHECK2-NEXT:    store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8
-// CHECK2-NEXT:    [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8
-// CHECK2-NEXT:    store ptr [[TMP74]], ptr [[VV]], align 8
-// CHECK2-NEXT:    [[TMP75:%.*]] = load i32, ptr [[CC]], align 4
-// CHECK2-NEXT:    [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8
-// CHECK2-NEXT:    [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8
-// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]])
-// CHECK2-NEXT:    br label %[[IF_END90]]
-// CHECK2:       [[IF_END90]]:
+// CHECK2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]]
+// CHECK2-NEXT:    br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]]
+// CHECK2:       [[IF_THEN68]]:
+// CHECK2-NEXT:    [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]]
+// CHECK2-NEXT:    [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]]
+// CHECK2-NEXT:    store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2
+// CHECK2-NEXT:    [[ADD72:%.*]] = add nsw i32 0, [[MUL71]]
+// CHECK2-NEXT:    store i32 [[ADD72]], ptr [[J]], align 4
+// CHECK2-NEXT:    [[TMP52:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP52]])
+// CHECK2-NEXT:    br label %[[IF_END73]]
+// CHECK2:       [[IF_END73]]:
+// CHECK2-NEXT:    br label %[[IF_END74]]
+// CHECK2:       [[IF_END74]]:
+// CHECK2-NEXT:    [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT:    [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT:    [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]]
+// CHECK2-NEXT:    br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]]
+// CHECK2:       [[IF_THEN76]]:
+// CHECK2-NEXT:    [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT:    [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT:    [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT:    [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]]
+// CHECK2-NEXT:    [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]]
+// CHECK2-NEXT:    store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT:    [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT:    [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT:    [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1
+// CHECK2-NEXT:    [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]]
+// CHECK2-NEXT:    store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    store ptr [[TMP60]], ptr [[V]], align 8
+// CHECK2-NEXT:    [[TMP61:%.*]] = load i32, ptr [[C]], align 4
+// CHECK2-NEXT:    [[TMP62:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK2-NEXT:    [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]])
+// CHECK2-NEXT:    br label %[[IF_END81]]
+// CHECK2:       [[IF_END81]]:
+// CHECK2-NEXT:    [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT:    [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT:    [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]]
+// CHECK2-NEXT:    br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]]
+// CHECK2:       [[IF_THEN83]]:
+// CHECK2-NEXT:    [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
+// CHECK2-NEXT:    [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
+// CHECK2-NEXT:    [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT:    [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]]
+// CHECK2-NEXT:    [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]]
+// CHECK2-NEXT:    store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8
+// CHECK2-NEXT:    [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK2-NEXT:    [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
+// CHECK2-NEXT:    [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1
+// CHECK2-NEXT:    [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]]
+// CHECK2-NEXT:    store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8
+// CHECK2-NEXT:    [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8
+// CHECK2-NEXT:    store ptr [[TMP71]], ptr [[VV]], align 8
+// CHECK2-NEXT:    [[TMP72:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK2-NEXT:    [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK2-NEXT:    [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]])
+// CHECK2-NEXT:    br label %[[IF_END88]]
+// CHECK2:       [[IF_END88]]:
 // CHECK2-NEXT:    br label %[[FOR_INC:.*]]
 // CHECK2:       [[FOR_INC]]:
-// CHECK2-NEXT:    [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT:    [[INC:%.*]] = add nsw i64 [[TMP78]], 1
-// CHECK2-NEXT:    store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT:    [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i64 [[TMP75]], 1
+// CHECK2-NEXT:    store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8
 // CHECK2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
 // CHECK2:       [[FOR_END]]:
 // CHECK2-NEXT:    ret void
@@ -1427,13 +1664,11 @@ extern "C" void foo4() {
 // CHECK2-NEXT:  [[ENTRY:.*:]]
 // CHECK2-NEXT:    [[ARR:%.*]] = alloca [256 x double], align 16
 // CHECK2-NEXT:    [[J:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[DOTOMP_UB0:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[K:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[DOTOMP_UB1:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -1448,12 +1683,10 @@ extern "C" void foo4() {
 // CHECK2-NEXT:    [[__END2:%.*]] = alloca ptr, align 8
 // CHECK2-NEXT:    [[V:%.*]] = alloca ptr, align 8
 // CHECK2-NEXT:    store i32 0, ptr [[J]], align 4
-// CHECK2-NEXT:    store i32 127, ptr [[DOTOMP_UB0]], align 4
 // CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
 // CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
 // CHECK2-NEXT:    store i32 128, ptr [[DOTOMP_NI0]], align 4
 // CHECK2-NEXT:    store i32 0, ptr [[K]], align 4
-// CHECK2-NEXT:    store i32 63, ptr [[DOTOMP_UB1]], align 4
 // CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
 // CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
 // CHECK2-NEXT:    store i32 64, ptr [[DOTOMP_NI1]], align 4
@@ -1573,6 +1806,277 @@ extern "C" void foo4() {
 // CHECK2-NEXT:    ret void
 //
 //
+// CHECK2-LABEL: define dso_local void @foo5(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT:  [[ENTRY:.*:]]
+// CHECK2-NEXT:    [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK2-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[K:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_IV06:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[C:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__END2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_IV120:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[V:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[__RANGE264:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__BEGIN265:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__END267:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[VV:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[K]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    store i32 512, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK2-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2:       [[COND_TRUE]]:
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END:.*]]
+// CHECK2:       [[COND_FALSE]]:
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    br label %[[COND_END]]
+// CHECK2:       [[COND_END]]:
+// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK2-NEXT:    store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST04]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK2-NEXT:    [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK2-NEXT:    store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT:    store i32 42, ptr [[C]], align 4
+// CHECK2-NEXT:    store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
+// CHECK2-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK2-NEXT:    store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK2-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK2-NEXT:    store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT:    [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK2-NEXT:    [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK2-NEXT:    [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK2-NEXT:    [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK2-NEXT:    [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK2-NEXT:    [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK2-NEXT:    [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK2-NEXT:    [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK2-NEXT:    store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT:    store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT:    store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT:    [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK2-NEXT:    store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT:    store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT:    [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]]
+// CHECK2-NEXT:    br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]]
+// CHECK2:       [[COND_TRUE24]]:
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK2-NEXT:    br label %[[COND_END26:.*]]
+// CHECK2:       [[COND_FALSE25]]:
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT:    br label %[[COND_END26]]
+// CHECK2:       [[COND_END26]]:
+// CHECK2-NEXT:    [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ]
+// CHECK2-NEXT:    store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK2:       [[FOR_COND]]:
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128
+// CHECK2-NEXT:    br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2:       [[FOR_BODY]]:
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP21]])
+// CHECK2-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK2:       [[FOR_INC]]:
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP22]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[I]], align 4
+// CHECK2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK2:       [[FOR_END]]:
+// CHECK2-NEXT:    store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT:    br label %[[FOR_COND30:.*]]
+// CHECK2:       [[FOR_COND30]]:
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT:    [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK2-NEXT:    [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]]
+// CHECK2-NEXT:    br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]]
+// CHECK2:       [[FOR_BODY32]]:
+// CHECK2-NEXT:    [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT:    [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]]
+// CHECK2-NEXT:    br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]]
+// CHECK2:       [[IF_THEN]]:
+// CHECK2-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT:    [[CONV34:%.*]] = sext i32 [[TMP27]] to i64
+// CHECK2-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK2-NEXT:    [[CONV35:%.*]] = sext i32 [[TMP28]] to i64
+// CHECK2-NEXT:    [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]]
+// CHECK2-NEXT:    [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]]
+// CHECK2-NEXT:    [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32
+// CHECK2-NEXT:    store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT:    [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1
+// CHECK2-NEXT:    [[ADD39:%.*]] = add nsw i32 0, [[MUL38]]
+// CHECK2-NEXT:    store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT:    br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]]
+// CHECK2:       [[IF_THEN41]]:
+// CHECK2-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]]
+// CHECK2-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]]
+// CHECK2-NEXT:    store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2
+// CHECK2-NEXT:    [[ADD45:%.*]] = add nsw i32 0, [[MUL44]]
+// CHECK2-NEXT:    store i32 [[ADD45]], ptr [[J]], align 4
+// CHECK2-NEXT:    [[TMP37:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP37]])
+// CHECK2-NEXT:    br label %[[IF_END]]
+// CHECK2:       [[IF_END]]:
+// CHECK2-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]]
+// CHECK2-NEXT:    br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK2:       [[IF_THEN47]]:
+// CHECK2-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]]
+// CHECK2-NEXT:    [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]]
+// CHECK2-NEXT:    store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1
+// CHECK2-NEXT:    [[ADD51:%.*]] = add nsw i32 0, [[MUL50]]
+// CHECK2-NEXT:    store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP44]])
+// CHECK2-NEXT:    br label %[[IF_END52]]
+// CHECK2:       [[IF_END52]]:
+// CHECK2-NEXT:    br label %[[IF_END53]]
+// CHECK2:       [[IF_END53]]:
+// CHECK2-NEXT:    [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT:    [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT:    [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]]
+// CHECK2-NEXT:    br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]]
+// CHECK2:       [[IF_THEN55]]:
+// CHECK2-NEXT:    [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT:    [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT:    [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT:    [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]]
+// CHECK2-NEXT:    [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]]
+// CHECK2-NEXT:    store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT:    [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT:    [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT:    [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1
+// CHECK2-NEXT:    [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]]
+// CHECK2-NEXT:    store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT:    store ptr [[TMP52]], ptr [[V]], align 8
+// CHECK2-NEXT:    [[TMP53:%.*]] = load i32, ptr [[C]], align 4
+// CHECK2-NEXT:    [[TMP54:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK2-NEXT:    [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]])
+// CHECK2-NEXT:    br label %[[IF_END60]]
+// CHECK2:       [[IF_END60]]:
+// CHECK2-NEXT:    br label %[[FOR_INC61:.*]]
+// CHECK2:       [[FOR_INC61]]:
+// CHECK2-NEXT:    [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT:    [[INC62:%.*]] = add nsw i64 [[TMP56]], 1
+// CHECK2-NEXT:    store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT:    br label %[[FOR_COND30]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK2:       [[FOR_END63]]:
+// CHECK2-NEXT:    store i32 37, ptr [[CC]], align 4
+// CHECK2-NEXT:    store ptr [[ARR]], ptr [[__RANGE264]], align 8
+// CHECK2-NEXT:    [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT:    [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK2-NEXT:    [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0
+// CHECK2-NEXT:    [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256
+// CHECK2-NEXT:    store ptr [[ADD_PTR69]], ptr [[__END267]], align 8
+// CHECK2-NEXT:    br label %[[FOR_COND70:.*]]
+// CHECK2:       [[FOR_COND70]]:
+// CHECK2-NEXT:    [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT:    [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8
+// CHECK2-NEXT:    [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]]
+// CHECK2-NEXT:    br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]]
+// CHECK2:       [[FOR_BODY72]]:
+// CHECK2-NEXT:    [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT:    store ptr [[TMP61]], ptr [[VV]], align 8
+// CHECK2-NEXT:    [[TMP62:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK2-NEXT:    [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK2-NEXT:    [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]])
+// CHECK2-NEXT:    br label %[[FOR_INC73:.*]]
+// CHECK2:       [[FOR_INC73]]:
+// CHECK2-NEXT:    [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1
+// CHECK2-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT:    br label %[[FOR_COND70]]
+// CHECK2:       [[FOR_END74]]:
+// CHECK2-NEXT:    ret void
+//
+//
 // CHECK2-LABEL: define dso_local void @tfoo2(
 // CHECK2-SAME: ) #[[ATTR0]] {
 // CHECK2-NEXT:  [[ENTRY:.*:]]
@@ -1593,7 +2097,6 @@ extern "C" void foo4() {
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTNEW_STEP:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[DOTOMP_UB0:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_LB0:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_ST0:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_NI0:%.*]] = alloca i32, align 4
@@ -1602,7 +2105,6 @@ extern "C" void foo4() {
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[DOTOMP_UB1:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_LB1:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_ST1:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -1611,7 +2113,6 @@ extern "C" void foo4() {
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTNEW_STEP21:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[DOTOMP_UB2:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_LB2:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_ST2:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_NI2:%.*]] = alloca i32, align 4
@@ -1641,174 +2142,168 @@ extern "C" void foo4() {
 // CHECK2-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
 // CHECK2-NEXT:    [[SUB4:%.*]] = sub i32 [[DIV]], 1
 // CHECK2-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT:    store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
 // CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB0]], align 4
 // CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST0]], align 4
-// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT:    [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT:    [[ADD5:%.*]] = add i32 [[TMP8]], 1
 // CHECK2-NEXT:    store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP9]], ptr [[J]], align 4
 // CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4
-// CHECK2-NEXT:    store i32 [[TMP10]], ptr [[J]], align 4
-// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4
-// CHECK2-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4
-// CHECK2-NEXT:    store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK2-NEXT:    store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK2-NEXT:    [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK2-NEXT:    store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT:    [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
 // CHECK2-NEXT:    [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
 // CHECK2-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT:    [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
-// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT:    [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK2-NEXT:    [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
 // CHECK2-NEXT:    [[SUB14:%.*]] = sub i32 [[DIV13]], 1
 // CHECK2-NEXT:    store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK2-NEXT:    store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
 // CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB1]], align 4
 // CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST1]], align 4
-// CHECK2-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK2-NEXT:    [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT:    [[ADD15:%.*]] = add i32 [[TMP17]], 1
 // CHECK2-NEXT:    store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK2-NEXT:    store i32 [[ADD16]], ptr [[K]], align 4
 // CHECK2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4
 // CHECK2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK2-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK2-NEXT:    store i32 [[ADD16]], ptr [[K]], align 4
-// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4
-// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK2-NEXT:    [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK2-NEXT:    [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
 // CHECK2-NEXT:    store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK2-NEXT:    [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4
-// CHECK2-NEXT:    [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK2-NEXT:    [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT:    [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
 // CHECK2-NEXT:    store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4
-// CHECK2-NEXT:    [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK2-NEXT:    store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4
-// CHECK2-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
-// CHECK2-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK2-NEXT:    [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]]
+// CHECK2-NEXT:    [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK2-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT:    [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]]
 // CHECK2-NEXT:    [[SUB24:%.*]] = sub i32 [[SUB23]], 1
-// CHECK2-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
-// CHECK2-NEXT:    [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]]
-// CHECK2-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
-// CHECK2-NEXT:    [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]]
+// CHECK2-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT:    [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]]
+// CHECK2-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT:    [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]]
 // CHECK2-NEXT:    [[SUB27:%.*]] = sub i32 [[DIV26]], 1
 // CHECK2-NEXT:    store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK2-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK2-NEXT:    store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4
 // CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_LB2]], align 4
 // CHECK2-NEXT:    store i32 1, ptr [[DOTOMP_ST2]], align 4
-// CHECK2-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK2-NEXT:    [[ADD28:%.*]] = add i32 [[TMP32]], 1
+// CHECK2-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK2-NEXT:    [[ADD28:%.*]] = add i32 [[TMP29]], 1
 // CHECK2-NEXT:    store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4
-// CHECK2-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK2-NEXT:    store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK2-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK2-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK2-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]]
+// CHECK2-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]]
 // CHECK2-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
 // CHECK2:       [[COND_TRUE]]:
-// CHECK2-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
 // CHECK2-NEXT:    br label %[[COND_END:.*]]
 // CHECK2:       [[COND_FALSE]]:
-// CHECK2-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
 // CHECK2-NEXT:    br label %[[COND_END]]
 // CHECK2:       [[COND_END]]:
-// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ]
+// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ]
 // CHECK2-NEXT:    store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4
-// CHECK2-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
-// CHECK2-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
-// CHECK2-NEXT:    [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]]
+// CHECK2-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT:    [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]]
 // CHECK2-NEXT:    br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]]
 // CHECK2:       [[COND_TRUE30]]:
-// CHECK2-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
 // CHECK2-NEXT:    br label %[[COND_END32:.*]]
 // CHECK2:       [[COND_FALSE31]]:
-// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
 // CHECK2-NEXT:    br label %[[COND_END32]]
 // CHECK2:       [[COND_END32]]:
-// CHECK2-NEXT:    [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ]
+// CHECK2-NEXT:    [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ]
 // CHECK2-NEXT:    store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4
 // CHECK2-NEXT:    store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
 // CHECK2-NEXT:    br label %[[FOR_COND:.*]]
 // CHECK2:       [[FOR_COND]]:
-// CHECK2-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
-// CHECK2-NEXT:    [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT:    [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]]
 // CHECK2-NEXT:    br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
 // CHECK2:       [[FOR_BODY]]:
-// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK2-NEXT:    [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]]
+// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT:    [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]]
 // CHECK2-NEXT:    br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
 // CHECK2:       [[IF_THEN]]:
-// CHECK2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
-// CHECK2-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
-// CHECK2-NEXT:    [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]]
-// CHECK2-NEXT:    [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]]
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]]
+// CHECK2-NEXT:    [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]]
 // CHECK2-NEXT:    store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4
-// CHECK2-NEXT:    [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
-// CHECK2-NEXT:    [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
-// CHECK2-NEXT:    [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]]
-// CHECK2-NEXT:    [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]]
+// CHECK2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT:    [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT:    [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]]
+// CHECK2-NEXT:    [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]]
 // CHECK2-NEXT:    store i32 [[ADD38]], ptr [[I]], align 4
-// CHECK2-NEXT:    [[TMP52:%.*]] = load i32, ptr [[I]], align 4
-// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP52]])
+// CHECK2-NEXT:    [[TMP49:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP49]])
 // CHECK2-NEXT:    br label %[[IF_END]]
 // CHECK2:       [[IF_END]]:
-// CHECK2-NEXT:    [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK2-NEXT:    [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]]
+// CHECK2-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT:    [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]]
 // CHECK2-NEXT:    br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]]
 // CHECK2:       [[IF_THEN40]]:
-// CHECK2-NEXT:    [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
-// CHECK2-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
-// CHECK2-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]]
-// CHECK2-NEXT:    [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]]
+// CHECK2-NEXT:    [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT:    [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT:    [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]]
+// CHECK2-NEXT:    [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]]
 // CHECK2-NEXT:    store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4
-// CHECK2-NEXT:    [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK2-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
-// CHECK2-NEXT:    [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT:    [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]]
-// CHECK2-NEXT:    [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]]
+// CHECK2-NEXT:    [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT:    [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]]
+// CHECK2-NEXT:    [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]]
 // CHECK2-NEXT:    store i32 [[SUB44]], ptr [[J]], align 4
-// CHECK2-NEXT:    [[TMP61:%.*]] = load i32, ptr [[J]], align 4
-// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP61]])
+// CHECK2-NEXT:    [[TMP58:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP58]])
 // CHECK2-NEXT:    br label %[[IF_END45]]
 // CHECK2:       [[IF_END45]]:
-// CHECK2-NEXT:    [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
-// CHECK2-NEXT:    [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]]
+// CHECK2-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT:    [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]]
 // CHECK2-NEXT:    br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
 // CHECK2:       [[IF_THEN47]]:
-// CHECK2-NEXT:    [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
-// CHECK2-NEXT:    [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
-// CHECK2-NEXT:    [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]]
-// CHECK2-NEXT:    [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]]
+// CHECK2-NEXT:    [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
+// CHECK2-NEXT:    [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
+// CHECK2-NEXT:    [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]]
+// CHECK2-NEXT:    [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]]
 // CHECK2-NEXT:    store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4
-// CHECK2-NEXT:    [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK2-NEXT:    [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
-// CHECK2-NEXT:    [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
-// CHECK2-NEXT:    [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]]
-// CHECK2-NEXT:    [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]]
+// CHECK2-NEXT:    [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT:    [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
+// CHECK2-NEXT:    [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT:    [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]]
+// CHECK2-NEXT:    [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]]
 // CHECK2-NEXT:    store i32 [[ADD51]], ptr [[K]], align 4
-// CHECK2-NEXT:    [[TMP70:%.*]] = load i32, ptr [[K]], align 4
-// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP70]])
+// CHECK2-NEXT:    [[TMP67:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP67]])
 // CHECK2-NEXT:    br label %[[IF_END52]]
 // CHECK2:       [[IF_END52]]:
 // CHECK2-NEXT:    br label %[[FOR_INC:.*]]
 // CHECK2:       [[FOR_INC]]:
-// CHECK2-NEXT:    [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    [[INC:%.*]] = add i32 [[TMP71]], 1
+// CHECK2-NEXT:    [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add i32 [[TMP68]], 1
 // CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK2-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
 // CHECK2:       [[FOR_END]]:
 // CHECK2-NEXT:    ret void
 //
@@ -1819,6 +2314,8 @@ extern "C" void foo4() {
 // CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
 // CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]}
 // CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
+// CHECK1: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]}
+// CHECK1: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]}
 //.
 // CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
 // CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"}
@@ -1826,4 +2323,6 @@ extern "C" void foo4() {
 // CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
 // CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]}
 // CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
+// CHECK2: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]}
+// CHECK2: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]}
 //.

>From 823bc08b4ef97458665ed41409e03cd07598efd3 Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:44:48 +0000
Subject: [PATCH 5/9] Fixed missing diagnostic groups in warnings

---
 clang/include/clang/Basic/DiagnosticSemaKinds.td | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 191618e7865dc..a6ae0de004c8a 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11559,7 +11559,8 @@ def note_omp_implicit_dsa : Note<
 def err_omp_loop_var_dsa : Error<
   "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">;
 def warn_omp_different_loop_ind_var_types : Warning <
-  "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">;
+  "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">,
+  InGroup<OpenMPLoopForm>;
 def err_omp_not_canonical_loop : Error <
   "loop after '#pragma omp %0' is not in canonical form">;
 def err_omp_not_a_loop_sequence : Error < 
@@ -11570,7 +11571,8 @@ def err_omp_invalid_looprange : Error <
   "loop range in '#pragma omp %0' exceeds the number of available loops: "
   "range end '%1' is greater than the total number of loops '%2'">;
 def warn_omp_redundant_fusion : Warning <
-  "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">;
+  "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">,
+  InGroup<OpenMPClauses>;
 def err_omp_not_for : Error<
   "%select{statement after '#pragma omp %1' must be a for loop|"
   "expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">;

>From 422ffd7ef80a83156037a34c6ad955e67c504b4d Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:49:50 +0000
Subject: [PATCH 6/9] Fixed formatting and comments

---
 clang/lib/Sema/SemaOpenMP.cpp | 112 ++++++++++++++++++----------------
 1 file changed, 58 insertions(+), 54 deletions(-)

diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index b0529c9352c83..485eebf23ef93 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -14160,42 +14160,43 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective(
 }
 
 // Overloaded base case function
-template <typename T, typename F>
-static bool tryHandleAs(T *t, F &&) {
-    return false; 
+template <typename T, typename F> static bool tryHandleAs(T *t, F &&) {
+  return false;
 }
 
 /**
- * Tries to recursively cast `t` to one of the given types and invokes `f` if successful.
+ * Tries to recursively cast `t` to one of the given types and invokes `f` if
+ * successful.
  *
  * @tparam Class The first type to check.
  * @tparam Rest The remaining types to check.
  * @tparam T The base type of `t`.
- * @tparam F The callable type for the function to invoke upon a successful cast.
+ * @tparam F The callable type for the function to invoke upon a successful
+ * cast.
  * @param t The object to be checked.
  * @param f The function to invoke if `t` matches `Class`.
  * @return `true` if `t` matched any type and `f` was called, otherwise `false`.
  */
 template <typename Class, typename... Rest, typename T, typename F>
 static bool tryHandleAs(T *t, F &&f) {
-    if (Class *c = dyn_cast<Class>(t)) {
-        f(c); 
-        return true;
-    } else {
-        return tryHandleAs<Rest...>(t, std::forward<F>(f));
-    }
+  if (Class *c = dyn_cast<Class>(t)) {
+    f(c);
+    return true;
+  } else {
+    return tryHandleAs<Rest...>(t, std::forward<F>(f));
+  }
 }
 
 // Updates OriginalInits by checking Transform against loop transformation
 // directives and appending their pre-inits if a match is found.
 static void updatePreInits(OMPLoopBasedDirective *Transform,
                            SmallVectorImpl<SmallVector<Stmt *, 0>> &PreInits) {
-    if (!tryHandleAs<OMPTileDirective, OMPUnrollDirective, OMPReverseDirective,
-                     OMPInterchangeDirective, OMPFuseDirective>(
-            Transform, [&PreInits](auto *Dir) {
-              appendFlattenedStmtList(PreInits.back(), Dir->getPreInits());
-            }))
-        llvm_unreachable("Unhandled loop transformation");
+  if (!tryHandleAs<OMPTileDirective, OMPUnrollDirective, OMPReverseDirective,
+                   OMPInterchangeDirective, OMPFuseDirective>(
+          Transform, [&PreInits](auto *Dir) {
+            appendFlattenedStmtList(PreInits.back(), Dir->getPreInits());
+          }))
+    llvm_unreachable("Unhandled loop transformation");
 }
 
 bool SemaOpenMP::checkTransformableLoopNest(
@@ -14273,43 +14274,42 @@ class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor {
   unsigned getNestedLoopCount() const { return NestedLoopCount; }
 
   bool VisitForStmt(ForStmt *FS) override {
-        ++NestedLoopCount;
-        return true;
+    ++NestedLoopCount;
+    return true;
   }
 
   bool VisitCXXForRangeStmt(CXXForRangeStmt *FRS) override {
-        ++NestedLoopCount;
-        return true;
+    ++NestedLoopCount;
+    return true;
   }
 
   bool TraverseStmt(Stmt *S) override {
-        if (!S)
+    if (!S)
       return true;
 
-        // Skip traversal of all expressions, including special cases like
-        // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions
-        // may contain inner statements (and even loops), but they are not part
-        // of the syntactic body of the surrounding loop structure.
-        //  Therefore must not be counted
-        if (isa<Expr>(S))
+    // Skip traversal of all expressions, including special cases like
+    // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions
+    // may contain inner statements (and even loops), but they are not part
+    // of the syntactic body of the surrounding loop structure.
+    //  Therefore must not be counted
+    if (isa<Expr>(S))
       return true;
 
-        // Only recurse into CompoundStmt (block {}) and loop bodies
-        if (isa<CompoundStmt>(S) || isa<ForStmt>(S) ||
-            isa<CXXForRangeStmt>(S)) {
+    // Only recurse into CompoundStmt (block {}) and loop bodies
+    if (isa<CompoundStmt>(S) || isa<ForStmt>(S) || isa<CXXForRangeStmt>(S)) {
       return DynamicRecursiveASTVisitor::TraverseStmt(S);
-        }
+    }
 
-        // Stop traversal of the rest of statements, that break perfect
-        // loop nesting, such as control flow (IfStmt, SwitchStmt...)
-        return true;
+    // Stop traversal of the rest of statements, that break perfect
+    // loop nesting, such as control flow (IfStmt, SwitchStmt...)
+    return true;
   }
 
   bool TraverseDecl(Decl *D) override {
-        // Stop in the case of finding a declaration, it is not important
-        // in order to find nested loops (Possible CXXRecordDecl, RecordDecl,
-        // FunctionDecl...)
-        return true;
+    // Stop in the case of finding a declaration, it is not important
+    // in order to find nested loops (Possible CXXRecordDecl, RecordDecl,
+    // FunctionDecl...)
+    return true;
   }
 };
 
@@ -14467,15 +14467,14 @@ bool SemaOpenMP::analyzeLoopSequence(
     return isa<OMPLoopTransformationDirective>(Child);
   };
 
-
   // High level grammar validation
   for (auto *Child : LoopSeqStmt->children()) {
 
-        if (!Child)
+    if (!Child)
       continue;
 
-        // Skip over non-loop-sequence statements
-        if (!isLoopSequenceDerivation(Child)) {
+    // Skip over non-loop-sequence statements
+    if (!isLoopSequenceDerivation(Child)) {
       Child = Child->IgnoreContainers();
 
       // Ignore empty compound statement
@@ -14493,9 +14492,9 @@ bool SemaOpenMP::analyzeLoopSequence(
         // Already been treated, skip this children
         continue;
       }
-        }
-        // Regular loop sequence handling
-        if (isLoopSequenceDerivation(Child)) {
+    }
+    // Regular loop sequence handling
+    if (isLoopSequenceDerivation(Child)) {
       if (isLoopGeneratingStmt(Child)) {
         if (!analyzeLoopGeneration(Child)) {
           return false;
@@ -14509,12 +14508,12 @@ bool SemaOpenMP::analyzeLoopSequence(
         // Update the Loop Sequence size by one
         ++LoopSeqSize;
       }
-        } else {
+    } else {
       // Report error for invalid statement inside canonical loop sequence
       Diag(Child->getBeginLoc(), diag::err_omp_not_for)
           << 0 << getOpenMPDirectiveName(Kind);
       return false;
-        }
+    }
   }
   return true;
 }
@@ -14531,9 +14530,9 @@ bool SemaOpenMP::checkTransformableLoopSequence(
 
   // Checks whether the given statement is a compound statement
   if (!isa<CompoundStmt>(AStmt)) {
-        Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence)
-            << getOpenMPDirectiveName(Kind);
-        return false;
+    Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence)
+        << getOpenMPDirectiveName(Kind);
+    return false;
   }
   // Number of top level canonical loop nests observed (And acts as index)
   LoopSeqSize = 0;
@@ -14564,7 +14563,7 @@ bool SemaOpenMP::checkTransformableLoopSequence(
                            OriginalInits, TransformsPreInits,
                            LoopSequencePreInits, LoopCategories, Context,
                            Kind)) {
-        return false;
+    return false;
   }
   if (LoopSeqSize <= 0) {
     Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence)
@@ -15278,7 +15277,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
   Stmt *LoopStmt = nullptr;
   collectLoopStmts(AStmt, {LoopStmt});
 
-  // Determine the PreInit declarations.e
+  // Determine the PreInit declarations.
   SmallVector<Stmt *, 4> PreInits;
   addLoopPreInits(Context, LoopHelper, LoopStmt, OriginalInits[0], PreInits);
 
@@ -15894,13 +15893,18 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
     CountVal = CountInt.getZExtValue();
   };
 
-  // Checks if the loop range is valid
+  // OpenMP [6.0, Restrictions]
+  // first + count - 1 must not evaluate to a value greater than the
+  // loop sequence length of the associated canonical loop sequence.
   auto ValidLoopRange = [](uint64_t FirstVal, uint64_t CountVal,
                            unsigned NumLoops) -> bool {
     return FirstVal + CountVal - 1 <= NumLoops;
   };
   uint64_t FirstVal = 1, CountVal = 0, LastVal = LoopSeqSize;
 
+  // Validates the loop range after evaluating the semantic information
+  // and ensures that the range is valid for the given loop sequence size.
+  // Expressions are evaluated at compile time to obtain constant values.
   if (LRC) {
     EvaluateLoopRangeArguments(LRC->getFirst(), LRC->getCount(), FirstVal,
                                CountVal);

>From ac0d9e348109f742440003945d278a9c26f56976 Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:58:54 +0000
Subject: [PATCH 7/9] Added minimal changes to enable flang future
 implementation

---
 flang/include/flang/Parser/dump-parse-tree.h | 1 +
 flang/include/flang/Parser/parse-tree.h      | 9 +++++++++
 flang/lib/Lower/OpenMP/Clauses.cpp           | 5 +++++
 flang/lib/Lower/OpenMP/Clauses.h             | 1 +
 flang/lib/Parser/openmp-parsers.cpp          | 7 +++++++
 flang/lib/Parser/unparse.cpp                 | 7 +++++++
 flang/lib/Semantics/check-omp-structure.cpp  | 9 +++++++++
 llvm/include/llvm/Frontend/OpenMP/OMP.td     | 1 +
 8 files changed, 40 insertions(+)

diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index df9278697346f..c220c4dafb52f 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -609,6 +609,7 @@ class ParseTreeDumper {
   NODE(OmpLinearClause, Modifier)
   NODE(parser, OmpLinearModifier)
   NODE_ENUM(OmpLinearModifier, Value)
+  NODE(parser, OmpLoopRangeClause)
   NODE(parser, OmpStepComplexModifier)
   NODE(parser, OmpStepSimpleModifier)
   NODE(parser, OmpLoopDirective)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index 254236b510544..be80141b49e2b 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4361,6 +4361,15 @@ struct OmpLinearClause {
   std::tuple<OmpObjectList, MODIFIERS(), /*PostModified=*/bool> t;
 };
 
+// Ref: [6.0:207-208]
+//
+// loop-range-clause ->
+//    LOOPRANGE(first, count)                       // since 6.0
+struct OmpLoopRangeClause {
+  TUPLE_CLASS_BOILERPLATE(OmpLoopRangeClause);
+  std::tuple<ScalarIntConstantExpr, ScalarIntConstantExpr> t;
+};
+
 // Ref: [4.5:216-219], [5.0:315-324], [5.1:347-355], [5.2:150-158]
 //
 // map-clause ->
diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
index f3088b18b77ff..ea535ab3adbe7 100644
--- a/flang/lib/Lower/OpenMP/Clauses.cpp
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -998,6 +998,11 @@ Link make(const parser::OmpClause::Link &inp,
   return Link{/*List=*/makeObjects(inp.v, semaCtx)};
 }
 
+LoopRange make(const parser::OmpClause::Looprange &inp,
+            semantics::SemanticsContext &semaCtx) {
+  llvm_unreachable("Unimplemented: looprange");
+}
+
 Map make(const parser::OmpClause::Map &inp,
          semantics::SemanticsContext &semaCtx) {
   // inp.v -> parser::OmpMapClause
diff --git a/flang/lib/Lower/OpenMP/Clauses.h b/flang/lib/Lower/OpenMP/Clauses.h
index d7ab21d428e32..bda8571e65f23 100644
--- a/flang/lib/Lower/OpenMP/Clauses.h
+++ b/flang/lib/Lower/OpenMP/Clauses.h
@@ -239,6 +239,7 @@ using Initializer = tomp::clause::InitializerT<TypeTy, IdTy, ExprTy>;
 using InReduction = tomp::clause::InReductionT<TypeTy, IdTy, ExprTy>;
 using IsDevicePtr = tomp::clause::IsDevicePtrT<TypeTy, IdTy, ExprTy>;
 using Lastprivate = tomp::clause::LastprivateT<TypeTy, IdTy, ExprTy>;
+using LoopRange = tomp::clause::LoopRangeT<TypeTy, IdTy, ExprTy>;
 using Linear = tomp::clause::LinearT<TypeTy, IdTy, ExprTy>;
 using Link = tomp::clause::LinkT<TypeTy, IdTy, ExprTy>;
 using Map = tomp::clause::MapT<TypeTy, IdTy, ExprTy>;
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index 52d3a5844c969..393dbe8ada002 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -841,6 +841,11 @@ TYPE_PARSER(
         maybe(":"_tok >> nonemptyList(Parser<OmpLinearClause::Modifier>{})),
         /*PostModified=*/pure(true)))
 
+TYPE_PARSER(
+  construct<OmpLoopRangeClause>(scalarIntConstantExpr,
+                                "," >> scalarIntConstantExpr)
+)
+
 // OpenMPv5.2 12.5.2 detach-clause -> DETACH (event-handle)
 TYPE_PARSER(construct<OmpDetachClause>(Parser<OmpObject>{}))
 
@@ -1010,6 +1015,8 @@ TYPE_PARSER( //
                     parenthesized(Parser<OmpLinearClause>{}))) ||
     "LINK" >> construct<OmpClause>(construct<OmpClause::Link>(
                   parenthesized(Parser<OmpObjectList>{}))) ||
+    "LOOPRANGE" >> construct<OmpClause>(construct<OmpClause::Looprange>(
+                  parenthesized(Parser<OmpLoopRangeClause>{}))) ||
     "MAP" >> construct<OmpClause>(construct<OmpClause::Map>(
                  parenthesized(Parser<OmpMapClause>{}))) ||
     "MATCH" >> construct<OmpClause>(construct<OmpClause::Match>(
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index a626888b7dfe5..00b5a8c0600e1 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2314,6 +2314,13 @@ class UnparseVisitor {
       }
     }
   }
+  void Unparse(const OmpLoopRangeClause &x) {
+    Word("LOOPRANGE(");
+    Walk(std::get<0>(x.t));
+    Put(", ");
+    Walk(std::get<1>(x.t));
+    Put(")");
+  }
   void Unparse(const OmpReductionClause &x) {
     using Modifier = OmpReductionClause::Modifier;
     Walk(std::get<std::optional<std::list<Modifier>>>(x.t), ": ");
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index 606014276e7ca..4af2b4909fcb6 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -3383,6 +3383,15 @@ CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Collapse, OMPC_collapse)
 CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Safelen, OMPC_safelen)
 CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Simdlen, OMPC_simdlen)
 
+void OmpStructureChecker::Enter(const parser::OmpClause::Looprange &x) {
+  context_.Say(GetContext().clauseSource,
+      "LOOPRANGE clause is not implemented yet"_err_en_US,
+      ContextDirectiveAsFortran());
+}
+
+void OmpStructureChecker::Enter(const parser::OmpClause::FreeAgent &x) {
+  context_.Say(GetContext().clauseSource,
+      "FREE_AGENT clause is not implemented yet"_err_en_US,
 // Restrictions specific to each clause are implemented apart from the
 // generalized restrictions.
 
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index ae19385c022d0..3be758686c634 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -273,6 +273,7 @@ def OMPC_Link : Clause<"link"> {
 }
 def OMPC_LoopRange : Clause<"looprange"> {
   let clangClass = "OMPLoopRangeClause";
+  let flangClass = "OmpLoopRangeClause";
 }
 def OMPC_Map : Clause<"map"> {
   let clangClass = "OMPMapClause";

>From e6e00ae563e491968637e00d2a15a7272bc9d146 Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Wed, 21 May 2025 13:14:22 +0000
Subject: [PATCH 8/9] Address basic PR feedback

---
 clang/include/clang/AST/OpenMPClause.h      |  93 ++++----
 clang/include/clang/AST/StmtOpenMP.h        |   3 +-
 clang/include/clang/Sema/SemaOpenMP.h       |  14 +-
 clang/lib/AST/OpenMPClause.cpp              |  17 +-
 clang/lib/CodeGen/CGExpr.cpp                |   5 +-
 clang/lib/CodeGen/CodeGenFunction.h         |   4 -
 clang/lib/Sema/SemaOpenMP.cpp               | 224 +++++++++-----------
 flang/lib/Semantics/check-omp-structure.cpp |   3 -
 8 files changed, 166 insertions(+), 197 deletions(-)

diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index 8f937cdef9cd0..3df5133a17fb4 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -1153,82 +1153,73 @@ class OMPFullClause final : public OMPNoChildClause<llvm::omp::OMPC_full> {
 ///   for(int j = 0; j < 256; j+=2)
 ///   for(int k = 127; k >= 0; --k)
 /// \endcode
-class OMPLoopRangeClause final : public OMPClause {
+class OMPLoopRangeClause final
+    : public OMPClause,
+      private llvm::TrailingObjects<OMPLoopRangeClause, Expr *> {
   friend class OMPClauseReader;
-
-  explicit OMPLoopRangeClause()
-      : OMPClause(llvm::omp::OMPC_looprange, {}, {}) {}
+  friend class llvm::TrailingObjects<OMPLoopRangeClause, Expr *>;
 
   /// Location of '('
   SourceLocation LParenLoc;
 
-  /// Location of 'first'
-  SourceLocation FirstLoc;
-
-  /// Location of 'count'
-  SourceLocation CountLoc;
-
-  /// Expr associated with 'first' argument
-  Expr *First = nullptr;
-
-  /// Expr associated with 'count' argument
-  Expr *Count = nullptr;
-
-  /// Set 'first'
-  void setFirst(Expr *First) { this->First = First; }
+  /// Location of first and count expressions
+  SourceLocation FirstLoc, CountLoc;
 
-  /// Set 'count'
-  void setCount(Expr *Count) { this->Count = Count; }
+  /// Number of looprange arguments (always 2: first, count)
+  unsigned NumArgs = 2;
 
-  /// Set location of '('.
-  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
-
-  /// Set location of 'first' argument
-  void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; }
+  /// Set the argument expressions.
+  void setArgs(ArrayRef<Expr *> Args) {
+    assert(Args.size() == NumArgs && "Expected exactly 2 looprange arguments");
+    std::copy(Args.begin(), Args.end(), getTrailingObjects<Expr *>());
+  }
 
-  /// Set location of 'count' argument
-  void setCountLoc(SourceLocation Loc) { CountLoc = Loc; }
+  /// Build an empty clause for deserialization.
+  explicit OMPLoopRangeClause()
+      : OMPClause(llvm::omp::OMPC_looprange, {}, {}), NumArgs(2) {}
 
 public:
-  /// Build an AST node for a 'looprange' clause
-  ///
-  /// \param StartLoc     Starting location of the clause.
-  /// \param LParenLoc    Location of '('.
-  /// \param ModifierLoc  Modifier location.
-  /// \param
+  /// Build a 'looprange' clause AST node.
   static OMPLoopRangeClause *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
          SourceLocation FirstLoc, SourceLocation CountLoc,
-         SourceLocation EndLoc, Expr *First, Expr *Count);
+         SourceLocation EndLoc, ArrayRef<Expr *> Args);
 
-  /// Build an empty 'looprange' node for deserialization
-  ///
-  /// \param C      Context of the AST.
+  /// Build an empty 'looprange' clause node.
   static OMPLoopRangeClause *CreateEmpty(const ASTContext &C);
 
-  /// Returns the location of '('
+  // Location getters/setters
   SourceLocation getLParenLoc() const { return LParenLoc; }
-
-  /// Returns the location of 'first'
   SourceLocation getFirstLoc() const { return FirstLoc; }
-
-  /// Returns the location of 'count'
   SourceLocation getCountLoc() const { return CountLoc; }
 
-  /// Returns the argument 'first' or nullptr if not set
-  Expr *getFirst() const { return cast_or_null<Expr>(First); }
+  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+  void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; }
+  void setCountLoc(SourceLocation Loc) { CountLoc = Loc; }
 
-  /// Returns the argument 'count' or nullptr if not set
-  Expr *getCount() const { return cast_or_null<Expr>(Count); }
+  /// Get looprange arguments: first and count
+  Expr *getFirst() const { return getArgs()[0]; }
+  Expr *getCount() const { return getArgs()[1]; }
 
-  child_range children() {
-    return child_range(reinterpret_cast<Stmt **>(&First),
-                       reinterpret_cast<Stmt **>(&Count) + 1);
+  /// Set looprange arguments: first and count
+  void setFirst(Expr *E) { getArgs()[0] = E; }
+  void setCount(Expr *E) { getArgs()[1] = E; }
+
+  MutableArrayRef<Expr *> getArgs() {
+    return MutableArrayRef<Expr *>(getTrailingObjects<Expr *>(), NumArgs);
+  }
+  ArrayRef<Expr *> getArgs() const {
+    return ArrayRef<Expr *>(getTrailingObjects<Expr *>(), NumArgs);
   }
 
+  child_range children() {
+    return child_range(reinterpret_cast<Stmt **>(getArgs().begin()),
+                       reinterpret_cast<Stmt **>(getArgs().end()));
+  }
   const_child_range children() const {
-    auto Children = const_cast<OMPLoopRangeClause *>(this)->children();
-    return const_child_range(Children.begin(), Children.end());
+    auto AR = getArgs();
+    return const_child_range(reinterpret_cast<Stmt *const *>(AR.begin()),
+                             reinterpret_cast<Stmt *const *>(AR.end()));
   }
 
   child_range used_children() {
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index b6a948a8c6020..cb871c9894d01 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -5807,7 +5807,6 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective {
                                        llvm::omp::OMPD_reverse, StartLoc,
                                        EndLoc, 1) {
     // Reverse produces a single top-level canonical loop nest
-    setNumGeneratedLoops(1);
     setNumGeneratedLoopNests(1);
   }
 
@@ -5878,7 +5877,7 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective {
                                        EndLoc, NumLoops) {
     // Interchange produces a single top-level canonical loop
     // nest, with the exact same amount of total loops
-    setNumGeneratedLoops(NumLoops);
+    setNumGeneratedLoops(3 * NumLoops);
     setNumGeneratedLoopNests(1);
   }
 
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index ac4cbe3709a0d..35bb884c0c1f2 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -1491,7 +1491,7 @@ class SemaOpenMP : public SemaBase {
   bool checkTransformableLoopNest(
       OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops,
       SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
-      Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits);
+      Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits);
 
   /// @brief Categories of loops encountered during semantic OpenMP loop
   /// analysis
@@ -1554,9 +1554,9 @@ class SemaOpenMP : public SemaBase {
       Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops,
       SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
       SmallVectorImpl<Stmt *> &ForStmts,
-      SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
-      SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
-      SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+      SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits,
+      SmallVectorImpl<SmallVector<Stmt *>> &TransformsPreInits,
+      SmallVectorImpl<SmallVector<Stmt *>> &LoopSequencePreInits,
       SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context,
       OpenMPDirectiveKind Kind);
 
@@ -1590,9 +1590,9 @@ class SemaOpenMP : public SemaBase {
       unsigned &NumLoops,
       SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
       SmallVectorImpl<Stmt *> &ForStmts,
-      SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
-      SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
-      SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+      SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits,
+      SmallVectorImpl<SmallVector<Stmt *>> &TransformsPreInits,
+      SmallVectorImpl<SmallVector<Stmt *>> &LoopSequencePreInits,
       SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context);
 
   /// Helper to keep information about the current `omp begin/end declare
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 0b5808eb100e4..e0570262b2a05 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -1026,22 +1026,25 @@ OMPPartialClause *OMPPartialClause::CreateEmpty(const ASTContext &C) {
 
 OMPLoopRangeClause *
 OMPLoopRangeClause::Create(const ASTContext &C, SourceLocation StartLoc,
-                           SourceLocation LParenLoc, SourceLocation EndLoc,
-                           SourceLocation FirstLoc, SourceLocation CountLoc,
-                           Expr *First, Expr *Count) {
+                           SourceLocation LParenLoc, SourceLocation FirstLoc,
+                           SourceLocation CountLoc, SourceLocation EndLoc,
+                           ArrayRef<Expr *> Args) {
+
+  assert(Args.size() == 2 &&
+         "looprange clause must have exactly two arguments");
   OMPLoopRangeClause *Clause = CreateEmpty(C);
   Clause->setLocStart(StartLoc);
   Clause->setLParenLoc(LParenLoc);
-  Clause->setLocEnd(EndLoc);
   Clause->setFirstLoc(FirstLoc);
   Clause->setCountLoc(CountLoc);
-  Clause->setFirst(First);
-  Clause->setCount(Count);
+  Clause->setLocEnd(EndLoc);
+  Clause->setArgs(Args);
   return Clause;
 }
 
 OMPLoopRangeClause *OMPLoopRangeClause::CreateEmpty(const ASTContext &C) {
-  return new (C) OMPLoopRangeClause();
+  void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(2));
+  return new (Mem) OMPLoopRangeClause();
 }
 
 OMPAllocateClause *OMPAllocateClause::Create(
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 1671f07bc2760..268e4220b05b6 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3241,11 +3241,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
           var, ConvertTypeForMem(VD->getType()), getContext().getDeclAlign(VD));
 
     // No other cases for now.
-    } else {
-      llvm::dbgs() << "THE DAMN DECLREFEXPR HASN'T BEEN ENTERED IN LOCALDECLMAP\n";
-      VD->dumpColor();
+    } else
       llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?");
-    }
 
     // Handle threadlocal function locals.
     if (VD->getTLSKind() != VarDecl::TLS_None)
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index ce00198c396b6..a983901f560de 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -5414,10 +5414,6 @@ class CodeGenFunction : public CodeGenTypeCache {
 
   /// Set the address of a local variable.
   void setAddrOfLocalVar(const VarDecl *VD, Address Addr) {
-    if (LocalDeclMap.count(VD)) {
-      llvm::errs() << "Warning: VarDecl already exists in map: ";
-      VD->dumpColor(); 
-    }
     assert(!LocalDeclMap.count(VD) && "Decl already exists in LocalDeclMap!");
     LocalDeclMap.insert({VD, Addr});
   }
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 485eebf23ef93..d2da417e5cfde 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -14159,38 +14159,37 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective(
       getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
 }
 
-// Overloaded base case function
+/// Overloaded base case function
 template <typename T, typename F> static bool tryHandleAs(T *t, F &&) {
   return false;
 }
 
-/**
- * Tries to recursively cast `t` to one of the given types and invokes `f` if
- * successful.
- *
- * @tparam Class The first type to check.
- * @tparam Rest The remaining types to check.
- * @tparam T The base type of `t`.
- * @tparam F The callable type for the function to invoke upon a successful
- * cast.
- * @param t The object to be checked.
- * @param f The function to invoke if `t` matches `Class`.
- * @return `true` if `t` matched any type and `f` was called, otherwise `false`.
- */
+///
+/// Tries to recursively cast `t` to one of the given types and invokes `f` if
+/// successful.
+///
+/// @tparam Class The first type to check.
+/// @tparam Rest The remaining types to check.
+/// @tparam T The base type of `t`.
+/// @tparam F The callable type for the function to invoke upon a successful
+/// cast.
+/// @param t The object to be checked.
+/// @param f The function to invoke if `t` matches `Class`.
+/// @return `true` if `t` matched any type and `f` was called, otherwise
+/// `false`.
 template <typename Class, typename... Rest, typename T, typename F>
 static bool tryHandleAs(T *t, F &&f) {
   if (Class *c = dyn_cast<Class>(t)) {
     f(c);
     return true;
-  } else {
-    return tryHandleAs<Rest...>(t, std::forward<F>(f));
   }
+  return tryHandleAs<Rest...>(t, std::forward<F>(f));
 }
 
-// Updates OriginalInits by checking Transform against loop transformation
-// directives and appending their pre-inits if a match is found.
+/// Updates OriginalInits by checking Transform against loop transformation
+/// directives and appending their pre-inits if a match is found.
 static void updatePreInits(OMPLoopBasedDirective *Transform,
-                           SmallVectorImpl<SmallVector<Stmt *, 0>> &PreInits) {
+                           SmallVectorImpl<SmallVector<Stmt *>> &PreInits) {
   if (!tryHandleAs<OMPTileDirective, OMPUnrollDirective, OMPReverseDirective,
                    OMPInterchangeDirective, OMPFuseDirective>(
           Transform, [&PreInits](auto *Dir) {
@@ -14202,7 +14201,7 @@ static void updatePreInits(OMPLoopBasedDirective *Transform,
 bool SemaOpenMP::checkTransformableLoopNest(
     OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops,
     SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
-    Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits) {
+    Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits) {
   OriginalInits.emplace_back();
   bool Result = OMPLoopBasedDirective::doForAllLoops(
       AStmt->IgnoreContainers(), /*TryImperfectlyNestedLoops=*/false, NumLoops,
@@ -14236,40 +14235,40 @@ bool SemaOpenMP::checkTransformableLoopNest(
   return Result;
 }
 
-// Counts the total number of nested loops, including the outermost loop (the
-// original loop). PRECONDITION of this visitor is that it must be invoked from
-// the original loop to be analyzed. The traversal is stop for Decl's and
-// Expr's given that they may contain inner loops that must not be counted.
-//
-// Example AST structure for the code:
-//
-// int main() {
-//     #pragma omp fuse
-//     {
-//         for (int i = 0; i < 100; i++) {    <-- Outer loop
-//             []() {
-//                 for(int j = 0; j < 100; j++) {}  <-- NOT A LOOP
-//             };
-//             for(int j = 0; j < 5; ++j) {}    <-- Inner loop
-//         }
-//         for (int r = 0; i < 100; i++) {    <-- Outer loop
-//             struct LocalClass {
-//                 void bar() {
-//                     for(int j = 0; j < 100; j++) {}  <-- NOT A LOOP
-//                 }
-//             };
-//             for(int k = 0; k < 10; ++k) {}    <-- Inner loop
-//             {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP
-//         }
-//     }
-// }
-// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops
+/// Counts the total number of nested loops, including the outermost loop (the
+/// original loop). PRECONDITION of this visitor is that it must be invoked from
+/// the original loop to be analyzed. The traversal is stop for Decl's and
+/// Expr's given that they may contain inner loops that must not be counted.
+///
+/// Example AST structure for the code:
+///
+/// int main() {
+///     #pragma omp fuse
+///     {
+///         for (int i = 0; i < 100; i++) {    <-- Outer loop
+///             []() {
+///                 for(int j = 0; j < 100; j++) {}  <-- NOT A LOOP
+///             };
+///             for(int j = 0; j < 5; ++j) {}    <-- Inner loop
+///         }
+///         for (int r = 0; i < 100; i++) {    <-- Outer loop
+///             struct LocalClass {
+///                 void bar() {
+///                     for(int j = 0; j < 100; j++) {}  <-- NOT A LOOP
+///                 }
+///             };
+///             for(int k = 0; k < 10; ++k) {}    <-- Inner loop
+///             {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP
+///         }
+///     }
+/// }
+/// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops
 class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor {
 private:
   unsigned NestedLoopCount = 0;
 
 public:
-  explicit NestedLoopCounterVisitor() {}
+  explicit NestedLoopCounterVisitor() = default;
 
   unsigned getNestedLoopCount() const { return NestedLoopCount; }
 
@@ -14296,7 +14295,7 @@ class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor {
       return true;
 
     // Only recurse into CompoundStmt (block {}) and loop bodies
-    if (isa<CompoundStmt>(S) || isa<ForStmt>(S) || isa<CXXForRangeStmt>(S)) {
+    if (isa<CompoundStmt, ForStmt, CXXForRangeStmt>(S)) {
       return DynamicRecursiveASTVisitor::TraverseStmt(S);
     }
 
@@ -14317,19 +14316,18 @@ bool SemaOpenMP::analyzeLoopSequence(
     Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops,
     SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
     SmallVectorImpl<Stmt *> &ForStmts,
-    SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
-    SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
-    SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+    SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits,
+    SmallVectorImpl<SmallVector<Stmt *>> &TransformsPreInits,
+    SmallVectorImpl<SmallVector<Stmt *>> &LoopSequencePreInits,
     SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context,
     OpenMPDirectiveKind Kind) {
 
   VarsWithInheritedDSAType TmpDSA;
   QualType BaseInductionVarType;
-  // Helper Lambda to handle storing initialization and body statements for both
-  // ForStmt and CXXForRangeStmt and checks for any possible mismatch between
-  // induction variables types
-  auto storeLoopStatements = [&OriginalInits, &ForStmts, &BaseInductionVarType,
-                              this, &Context](Stmt *LoopStmt) {
+  /// Helper Lambda to handle storing initialization and body statements for
+  /// both ForStmt and CXXForRangeStmt and checks for any possible mismatch
+  /// between induction variables types
+  auto StoreLoopStatements = [&](Stmt *LoopStmt) {
     if (auto *For = dyn_cast<ForStmt>(LoopStmt)) {
       OriginalInits.back().push_back(For->getInit());
       ForStmts.push_back(For);
@@ -14357,16 +14355,11 @@ bool SemaOpenMP::analyzeLoopSequence(
     }
   };
 
-  // Helper lambda functions to encapsulate the processing of different
-  // derivations of the canonical loop sequence grammar
-  //
-  // Modularized code for handling loop generation and transformations
-  auto analyzeLoopGeneration = [&storeLoopStatements, &LoopHelpers,
-                                &OriginalInits, &TransformsPreInits,
-                                &LoopCategories, &LoopSeqSize, &NumLoops, Kind,
-                                &TmpDSA, &ForStmts, &Context,
-                                &LoopSequencePreInits, this](Stmt *Child) {
-    auto LoopTransform = dyn_cast<OMPLoopTransformationDirective>(Child);
+  /// Helper lambda functions to encapsulate the processing of different
+  /// derivations of the canonical loop sequence grammar
+  /// Modularized code for handling loop generation and transformations
+  auto AnalyzeLoopGeneration = [&](Stmt *Child) {
+    auto *LoopTransform = dyn_cast<OMPLoopTransformationDirective>(Child);
     Stmt *TransformedStmt = LoopTransform->getTransformedStmt();
     unsigned NumGeneratedLoopNests = LoopTransform->getNumGeneratedLoopNests();
     unsigned NumGeneratedLoops = LoopTransform->getNumGeneratedLoops();
@@ -14377,9 +14370,8 @@ bool SemaOpenMP::analyzeLoopSequence(
         LoopSeqSize += NumGeneratedLoopNests;
         NumLoops += NumGeneratedLoops;
         return true;
-      }
-      // Unroll full (0 loops produced)
-      else {
+      } else {
+        // Unroll full (0 loops produced)
         Diag(Child->getBeginLoc(), diag::err_omp_not_for)
             << 0 << getOpenMPDirectiveName(Kind);
         return false;
@@ -14406,9 +14398,8 @@ bool SemaOpenMP::analyzeLoopSequence(
                                  LoopHelpers, ForStmts, OriginalInits,
                                  TransformsPreInits, LoopSequencePreInits,
                                  LoopCategories, Context, Kind);
-    }
-    // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all)
-    else {
+    } else {
+      // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all)
       // Process the transformed loop statement
       OriginalInits.emplace_back();
       TransformsPreInits.emplace_back();
@@ -14424,7 +14415,7 @@ bool SemaOpenMP::analyzeLoopSequence(
             << getOpenMPDirectiveName(Kind);
         return false;
       }
-      storeLoopStatements(TransformedStmt);
+      StoreLoopStatements(TransformedStmt);
       updatePreInits(LoopTransform, TransformsPreInits);
 
       NumLoops += NumGeneratedLoops;
@@ -14433,10 +14424,8 @@ bool SemaOpenMP::analyzeLoopSequence(
     }
   };
 
-  // Modularized code for handling regular canonical loops
-  auto analyzeRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits,
-                             &LoopSeqSize, &NumLoops, Kind, &TmpDSA,
-                             &LoopCategories, this](Stmt *Child) {
+  /// Modularized code for handling regular canonical loops
+  auto AnalyzeRegularLoop = [&](Stmt *Child) {
     OriginalInits.emplace_back();
     LoopHelpers.emplace_back();
     LoopCategories.push_back(OMPLoopCategory::RegularLoop);
@@ -14451,19 +14440,19 @@ bool SemaOpenMP::analyzeLoopSequence(
       return false;
     }
 
-    storeLoopStatements(Child);
+    StoreLoopStatements(Child);
     auto NLCV = NestedLoopCounterVisitor();
     NLCV.TraverseStmt(Child);
     NumLoops += NLCV.getNestedLoopCount();
     return true;
   };
 
-  // Helper functions to validate canonical loop sequence grammar is valid
-  auto isLoopSequenceDerivation = [](auto *Child) {
-    return isa<ForStmt>(Child) || isa<CXXForRangeStmt>(Child) ||
-           isa<OMPLoopTransformationDirective>(Child);
+  /// Helper functions to validate loop sequence grammar derivations
+  auto IsLoopSequenceDerivation = [](auto *Child) {
+    return isa<ForStmt, CXXForRangeStmt, OMPLoopTransformationDirective>(Child);
   };
-  auto isLoopGeneratingStmt = [](auto *Child) {
+  /// Helper functions to validate loop generating grammar derivations
+  auto IsLoopGeneratingStmt = [](auto *Child) {
     return isa<OMPLoopTransformationDirective>(Child);
   };
 
@@ -14474,7 +14463,7 @@ bool SemaOpenMP::analyzeLoopSequence(
       continue;
 
     // Skip over non-loop-sequence statements
-    if (!isLoopSequenceDerivation(Child)) {
+    if (!IsLoopSequenceDerivation(Child)) {
       Child = Child->IgnoreContainers();
 
       // Ignore empty compound statement
@@ -14494,17 +14483,17 @@ bool SemaOpenMP::analyzeLoopSequence(
       }
     }
     // Regular loop sequence handling
-    if (isLoopSequenceDerivation(Child)) {
-      if (isLoopGeneratingStmt(Child)) {
-        if (!analyzeLoopGeneration(Child)) {
+    if (IsLoopSequenceDerivation(Child)) {
+      if (IsLoopGeneratingStmt(Child)) {
+        if (!AnalyzeLoopGeneration(Child))
           return false;
-        }
-        // analyzeLoopGeneration updates Loop Sequence size accordingly
+
+        // AnalyzeLoopGeneration updates Loop Sequence size accordingly
 
       } else {
-        if (!analyzeRegularLoop(Child)) {
+        if (!AnalyzeRegularLoop(Child))
           return false;
-        }
+
         // Update the Loop Sequence size by one
         ++LoopSeqSize;
       }
@@ -14523,9 +14512,9 @@ bool SemaOpenMP::checkTransformableLoopSequence(
     unsigned &NumLoops,
     SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
     SmallVectorImpl<Stmt *> &ForStmts,
-    SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
-    SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
-    SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+    SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits,
+    SmallVectorImpl<SmallVector<Stmt *>> &TransformsPreInits,
+    SmallVectorImpl<SmallVector<Stmt *>> &LoopSequencePreInits,
     SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context) {
 
   // Checks whether the given statement is a compound statement
@@ -14561,10 +14550,9 @@ bool SemaOpenMP::checkTransformableLoopSequence(
   // Recursive entry point to process the main loop sequence
   if (!analyzeLoopSequence(AStmt, LoopSeqSize, NumLoops, LoopHelpers, ForStmts,
                            OriginalInits, TransformsPreInits,
-                           LoopSequencePreInits, LoopCategories, Context,
-                           Kind)) {
+                           LoopSequencePreInits, LoopCategories, Context, Kind))
     return false;
-  }
+
   if (LoopSeqSize <= 0) {
     Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence)
         << getOpenMPDirectiveName(Kind);
@@ -14656,7 +14644,7 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
   // Verify and diagnose loop nest.
   SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops);
   Stmt *Body = nullptr;
-  SmallVector<SmallVector<Stmt *, 0>, 4> OriginalInits;
+  SmallVector<SmallVector<Stmt *>, 4> OriginalInits;
   if (!checkTransformableLoopNest(OMPD_tile, AStmt, NumLoops, LoopHelpers, Body,
                                   OriginalInits))
     return StmtError();
@@ -14933,7 +14921,7 @@ StmtResult SemaOpenMP::ActOnOpenMPStripeDirective(ArrayRef<OMPClause *> Clauses,
   // Verify and diagnose loop nest.
   SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops);
   Stmt *Body = nullptr;
-  SmallVector<SmallVector<Stmt *, 0>, 4> OriginalInits;
+  SmallVector<SmallVector<Stmt *>, 4> OriginalInits;
   if (!checkTransformableLoopNest(OMPD_stripe, AStmt, NumLoops, LoopHelpers,
                                   Body, OriginalInits))
     return StmtError();
@@ -15194,7 +15182,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
   Stmt *Body = nullptr;
   SmallVector<OMPLoopBasedDirective::HelperExprs, NumLoops> LoopHelpers(
       NumLoops);
-  SmallVector<SmallVector<Stmt *, 0>, NumLoops + 1> OriginalInits;
+  SmallVector<SmallVector<Stmt *>, NumLoops + 1> OriginalInits;
   if (!checkTransformableLoopNest(OMPD_unroll, AStmt, NumLoops, LoopHelpers,
                                   Body, OriginalInits))
     return StmtError();
@@ -15462,7 +15450,7 @@ StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt,
   Stmt *Body = nullptr;
   SmallVector<OMPLoopBasedDirective::HelperExprs, NumLoops> LoopHelpers(
       NumLoops);
-  SmallVector<SmallVector<Stmt *, 0>, NumLoops + 1> OriginalInits;
+  SmallVector<SmallVector<Stmt *>, NumLoops + 1> OriginalInits;
   if (!checkTransformableLoopNest(OMPD_reverse, AStmt, NumLoops, LoopHelpers,
                                   Body, OriginalInits))
     return StmtError();
@@ -15654,7 +15642,7 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective(
   // Verify and diagnose loop nest.
   SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops);
   Stmt *Body = nullptr;
-  SmallVector<SmallVector<Stmt *, 0>, 2> OriginalInits;
+  SmallVector<SmallVector<Stmt *>, 2> OriginalInits;
   if (!checkTransformableLoopNest(OMPD_interchange, AStmt, NumLoops,
                                   LoopHelpers, Body, OriginalInits))
     return StmtError();
@@ -15841,9 +15829,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
   CaptureVars CopyTransformer(SemaRef);
 
   // Ensure the structured block is not empty
-  if (!AStmt) {
+  if (!AStmt)
     return StmtError();
-  }
 
   unsigned NumLoops = 1;
   unsigned LoopSeqSize = 1;
@@ -15862,16 +15849,15 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
   // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops
   SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers;
   SmallVector<Stmt *> LoopStmts;
-  SmallVector<SmallVector<Stmt *, 0>> OriginalInits;
-  SmallVector<SmallVector<Stmt *, 0>> TransformsPreInits;
-  SmallVector<SmallVector<Stmt *, 0>> LoopSequencePreInits;
+  SmallVector<SmallVector<Stmt *>> OriginalInits;
+  SmallVector<SmallVector<Stmt *>> TransformsPreInits;
+  SmallVector<SmallVector<Stmt *>> LoopSequencePreInits;
   SmallVector<OMPLoopCategory, 0> LoopCategories;
   if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops,
                                       LoopHelpers, LoopStmts, OriginalInits,
                                       TransformsPreInits, LoopSequencePreInits,
-                                      LoopCategories, Context)) {
+                                      LoopCategories, Context))
     return StmtError();
-  }
 
   // Handle clauses, which can be any of the following: [looprange, apply]
   const OMPLoopRangeClause *LRC =
@@ -15961,9 +15947,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
   // expressions. Generates both the variable declaration and the corresponding
   // initialization statement.
   auto CreateHelperVarAndStmt =
-      [&SemaRef = this->SemaRef, &Context, &CopyTransformer,
-       &IVType](Expr *ExprToCopy, const std::string &BaseName, unsigned I,
-                bool NeedsNewVD = false) {
+      [&, &SemaRef = SemaRef](Expr *ExprToCopy, const std::string &BaseName,
+                              unsigned I, bool NeedsNewVD = false) {
         Expr *TransformedExpr =
             AssertSuccess(CopyTransformer.TransformExpr(ExprToCopy));
         if (!TransformedExpr)
@@ -16007,9 +15992,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
   // Transformations that apply this concept: Loopranged Fuse, Split
   if (!LoopSequencePreInits.empty()) {
     for (const auto &LTPreInits : LoopSequencePreInits) {
-      if (!LTPreInits.empty()) {
+      if (!LTPreInits.empty())
         llvm::append_range(PreInits, LTPreInits);
-      }
     }
   }
 
@@ -16038,9 +16022,9 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
       // Order matters: pre-inits may define variables used in the original
       // inits such as upper bounds...
       auto TransformPreInit = TransformsPreInits[TransformIndex++];
-      if (!TransformPreInit.empty()) {
+      if (!TransformPreInit.empty())
         llvm::append_range(PreInits, TransformPreInit);
-      }
+
       addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I],
                       PreInits);
     }
@@ -17459,13 +17443,15 @@ OMPClause *SemaOpenMP::ActOnOpenMPLoopRangeClause(
   if (CountVal.isInvalid())
     Count = nullptr;
 
+  SmallVector<Expr *, 2> ArgsVec = {First, Count};
+
   // OpenMP [6.0, Restrictions]
   // first + count - 1 must not evaluate to a value greater than the
   // loop sequence length of the associated canonical loop sequence.
   // This check must be performed afterwards due to the delayed
   // parsing and computation of the associated loop sequence
   return OMPLoopRangeClause::Create(getASTContext(), StartLoc, LParenLoc,
-                                    FirstLoc, CountLoc, EndLoc, First, Count);
+                                    FirstLoc, CountLoc, EndLoc, ArgsVec);
 }
 
 OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc,
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index 4af2b4909fcb6..ad4f54e6fdcc5 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -3389,9 +3389,6 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Looprange &x) {
       ContextDirectiveAsFortran());
 }
 
-void OmpStructureChecker::Enter(const parser::OmpClause::FreeAgent &x) {
-  context_.Say(GetContext().clauseSource,
-      "FREE_AGENT clause is not implemented yet"_err_en_US,
 // Restrictions specific to each clause are implemented apart from the
 // generalized restrictions.
 

>From 4100dfe4dd04ed1c953ea4e38a65e867c8e9f73f Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Thu, 22 May 2025 10:39:39 +0000
Subject: [PATCH 9/9] Removed unncessary warning and updated tests accordingly

---
 .../clang/Basic/DiagnosticSemaKinds.td        |  3 --
 clang/lib/Sema/SemaOpenMP.cpp                 | 21 +--------
 clang/test/OpenMP/fuse_messages.cpp           | 43 +++++++++++++++----
 3 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index a6ae0de004c8a..d1790cea6cc45 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11558,9 +11558,6 @@ def note_omp_implicit_dsa : Note<
   "implicitly determined as %0">;
 def err_omp_loop_var_dsa : Error<
   "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">;
-def warn_omp_different_loop_ind_var_types : Warning <
-  "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">,
-  InGroup<OpenMPLoopForm>;
 def err_omp_not_canonical_loop : Error <
   "loop after '#pragma omp %0' is not in canonical form">;
 def err_omp_not_a_loop_sequence : Error < 
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index d2da417e5cfde..76484b577f9c1 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -14323,31 +14323,12 @@ bool SemaOpenMP::analyzeLoopSequence(
     OpenMPDirectiveKind Kind) {
 
   VarsWithInheritedDSAType TmpDSA;
-  QualType BaseInductionVarType;
   /// Helper Lambda to handle storing initialization and body statements for
-  /// both ForStmt and CXXForRangeStmt and checks for any possible mismatch
-  /// between induction variables types
+  /// both ForStmt and CXXForRangeStmt
   auto StoreLoopStatements = [&](Stmt *LoopStmt) {
     if (auto *For = dyn_cast<ForStmt>(LoopStmt)) {
       OriginalInits.back().push_back(For->getInit());
       ForStmts.push_back(For);
-      // Extract induction variable
-      if (auto *InitStmt = dyn_cast_or_null<DeclStmt>(For->getInit())) {
-        if (auto *InitDecl = dyn_cast<VarDecl>(InitStmt->getSingleDecl())) {
-          QualType InductionVarType = InitDecl->getType().getCanonicalType();
-
-          // Compare with first loop type
-          if (BaseInductionVarType.isNull()) {
-            BaseInductionVarType = InductionVarType;
-          } else if (!Context.hasSameType(BaseInductionVarType,
-                                          InductionVarType)) {
-            Diag(InitDecl->getBeginLoc(),
-                 diag::warn_omp_different_loop_ind_var_types)
-                << getOpenMPDirectiveName(OMPD_fuse) << BaseInductionVarType
-                << InductionVarType;
-          }
-        }
-      }
     } else {
       auto *CXXFor = cast<CXXForRangeStmt>(LoopStmt);
       OriginalInits.back().push_back(CXXFor->getBeginStmt());
diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp
index 2a2491d008a0b..4902d424373e5 100644
--- a/clang/test/OpenMP/fuse_messages.cpp
+++ b/clang/test/OpenMP/fuse_messages.cpp
@@ -70,15 +70,6 @@ void func() {
         for(int j = 0; j < 10; ++j);
     }
 
-    //expected-warning at +5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'unsigned int'}}
-    //expected-warning at +5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'long long'}}
-    #pragma omp fuse 
-    {
-        for(int i = 0; i < 10; ++i);
-        for(unsigned int j = 0; j < 10; ++j);
-        for(long long k = 0; k < 100; ++k);
-    }
-
     //expected-warning at +2 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}}
     #pragma omp fuse
     {
@@ -123,6 +114,40 @@ void func() {
         for(int j = 0; j < 100; ++j);
         for(int k = 0; k < 50; ++k);
     }
+
+    //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '6' is greater than the total number of loops '5'}}
+    #pragma omp fuse looprange(1,6)
+    {
+        for(int i = 0; i < 10; ++i);
+        for(int j = 0; j < 100; ++j);
+        for(int k = 0; k < 50; ++k);
+        // This fusion results in  2 loops
+        #pragma omp fuse looprange(1,2)
+        {
+            for(int i = 0; i < 10; ++i);
+            for(int j = 0; j < 100; ++j);
+            for(int k = 0; k < 50; ++k);
+        }
+    }
+
+    //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '4' is greater than the total number of loops '3'}}
+    #pragma omp fuse looprange(2,3)
+    {
+        #pragma omp unroll partial(2)
+        for(int i = 0; i < 10; ++i);
+        
+        #pragma omp reverse
+        for(int j = 0; j < 10; ++j);
+
+        #pragma omp fuse 
+        {
+            {
+                #pragma omp reverse
+                for(int j = 0; j < 10; ++j);
+            }            
+            for(int k = 0; k < 50; ++k);
+        }
+    }
 }
 
 // In a template context, but expression itself not instantiation-dependent



More information about the cfe-commits mailing list