[clang] [llvm] [openmp] [Clang][OpenMP] Implement Loop splitting `#pragma omp split` directive (PR #183261)

Amit Tiwari via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 1 03:23:27 PDT 2026


https://github.com/amitamd7 updated https://github.com/llvm/llvm-project/pull/183261

>From 8b7585c834ebdfa40c200b3f224fbd78fdb56ed3 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Wed, 25 Feb 2026 03:48:23 -0500
Subject: [PATCH 01/32] Split node creation and registration

---
 clang/include/clang/AST/StmtOpenMP.h          | 74 +++++++++++++++++++
 clang/include/clang/Basic/StmtNodes.td        |  1 +
 .../include/clang/Serialization/ASTBitCodes.h |  1 +
 clang/lib/AST/StmtOpenMP.cpp                  | 22 ++++++
 llvm/include/llvm/Frontend/OpenMP/OMP.td      |  5 ++
 5 files changed, 103 insertions(+)

diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index bc6aeaa8d143c..626c39a7b778c 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -6065,6 +6065,80 @@ class OMPFuseDirective final
   }
 };
 
+/// Represents the '#pragma omp split' loop transformation directive.
+///
+/// \code{c}
+///   #pragma omp split
+///   for (int i = 0; i < n; ++i)
+///     ...
+/// \endcode
+///
+/// This directive transforms a single loop into multiple loops based on
+/// index ranges. The transformation splits the iteration space of the loop
+/// into multiple contiguous ranges.
+class OMPSplitDirective final
+    : public OMPCanonicalLoopNestTransformationDirective {
+  friend class ASTStmtReader;
+  friend class OMPExecutableDirective;
+
+  /// Offsets of child members.
+  enum {
+    PreInitsOffset = 0,
+    TransformedStmtOffset,
+  };
+
+  explicit OMPSplitDirective(SourceLocation StartLoc, SourceLocation EndLoc,
+                            unsigned NumLoops)
+      : OMPCanonicalLoopNestTransformationDirective(
+            OMPSplitDirectiveClass, llvm::omp::OMPD_split, StartLoc, EndLoc,
+            NumLoops) {}
+
+  void setPreInits(Stmt *PreInits) {
+    Data->getChildren()[PreInitsOffset] = PreInits;
+  }
+
+  void setTransformedStmt(Stmt *S) {
+    Data->getChildren()[TransformedStmtOffset] = S;
+  }
+
+public:
+  /// Create a new AST node representation for '#pragma omp split'.
+  ///
+  /// \param C         Context of the AST.
+  /// \param StartLoc  Location of the introducer (e.g. the 'omp' token).
+  /// \param EndLoc    Location of the directive's end (e.g. the tok::eod).
+  /// \param NumLoops  Number of affected loops (should be 1 for split).
+  /// \param AssociatedStmt  The outermost associated loop.
+  /// \param TransformedStmt The loop nest after splitting, or nullptr in
+  ///                        dependent contexts.
+  /// \param PreInits   Helper preinits statements for the loop nest.
+  static OMPSplitDirective *Create(const ASTContext &C,
+                                   SourceLocation StartLoc,
+                                   SourceLocation EndLoc,
+                                   Stmt *AssociatedStmt, unsigned NumLoops,
+                                   Stmt *TransformedStmt, Stmt *PreInits);
+
+  /// Build an empty '#pragma omp split' AST node for deserialization.
+  ///
+  /// \param C          Context of the AST.
+  /// \param NumLoops   Number of associated loops to allocate
+  static OMPSplitDirective *CreateEmpty(const ASTContext &C,
+                                        unsigned NumLoops);
+
+  /// Gets/sets the associated loops after the transformation, i.e. after
+  /// de-sugaring.
+  Stmt *getTransformedStmt() const {
+    return Data->getChildren()[TransformedStmtOffset];
+  }
+
+  /// Return preinits statement.
+  Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == OMPSplitDirectiveClass;
+  }
+};
+
 /// This represents '#pragma omp scan' directive.
 ///
 /// \code
diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index 61d76bafdfcde..e166894ea024b 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -244,6 +244,7 @@ def OMPTileDirective : StmtNode<OMPCanonicalLoopNestTransformationDirective>;
 def OMPStripeDirective : StmtNode<OMPCanonicalLoopNestTransformationDirective>;
 def OMPUnrollDirective : StmtNode<OMPCanonicalLoopNestTransformationDirective>;
 def OMPReverseDirective : StmtNode<OMPCanonicalLoopNestTransformationDirective>;
+def OMPSplitDirective : StmtNode<OMPCanonicalLoopNestTransformationDirective>;
 def OMPInterchangeDirective
     : StmtNode<OMPCanonicalLoopNestTransformationDirective>;
 def OMPCanonicalLoopSequenceTransformationDirective
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 783cd82895a90..9b798ed484454 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1965,6 +1965,7 @@ enum StmtCode {
   STMP_OMP_STRIPE_DIRECTIVE,
   STMT_OMP_UNROLL_DIRECTIVE,
   STMT_OMP_REVERSE_DIRECTIVE,
+  STMT_OMP_SPLIT_DIRECTIVE,
   STMT_OMP_INTERCHANGE_DIRECTIVE,
   STMT_OMP_FUSE_DIRECTIVE,
   STMT_OMP_FOR_DIRECTIVE,
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index a5b0cd3786a28..ada4e66b280f8 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -552,6 +552,28 @@ OMPInterchangeDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
       SourceLocation(), SourceLocation(), NumLoops);
 }
 
+OMPSplitDirective *OMPSplitDirective::Create(const ASTContext &C,
+                                             SourceLocation StartLoc,
+                                             SourceLocation EndLoc,
+                                             Stmt *AssociatedStmt,
+                                             unsigned NumLoops,
+                                             Stmt *TransformedStmt,
+                                             Stmt *PreInits) {
+  OMPSplitDirective *Dir = createDirective<OMPSplitDirective>(
+      C, {}, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc,
+      NumLoops);
+  Dir->setTransformedStmt(TransformedStmt);
+  Dir->setPreInits(PreInits);
+  return Dir;
+}
+
+OMPSplitDirective *OMPSplitDirective::CreateEmpty(const ASTContext &C,
+                                                   unsigned NumLoops) {
+  return createEmptyDirective<OMPSplitDirective>(
+      C, /*NumClauses=*/0, /*HasAssociatedStmt=*/true,
+      TransformedStmtOffset + 1, SourceLocation(), SourceLocation(), NumLoops);
+}
+
 OMPFuseDirective *OMPFuseDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     ArrayRef<OMPClause *> Clauses, unsigned NumGeneratedTopLevelLoops,
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index d1dddf76152ec..214366c3be59b 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -1435,6 +1435,11 @@ def OMP_Stripe : Directive<[Spelling<"stripe">]> {
   let association = AS_LoopNest;
   let category = CA_Executable;
 }
+def OMP_Split : Directive<[Spelling<"split">]> {
+  // TODO: Add counts clause support (OMPC_Counts)
+  let association = AS_LoopNest;
+  let category = CA_Executable;
+}
 def OMP_Unknown : Directive<[Spelling<"unknown">]> {
   let isDefault = true;
   let association = AS_None;

>From 9ac0f31b3fe3a001847adcc8f69c10afc22d5778 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Wed, 25 Feb 2026 06:51:02 -0500
Subject: [PATCH 02/32] wip

---
 clang/include/clang/AST/RecursiveASTVisitor.h |  3 +++
 clang/include/clang/AST/StmtOpenMP.h          | 14 ++++++--------
 clang/lib/AST/StmtOpenMP.cpp                  | 14 ++++++--------
 3 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index ce6ad723191e0..0802871c4a720 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -3202,6 +3202,9 @@ DEF_TRAVERSE_STMT(OMPFuseDirective,
 DEF_TRAVERSE_STMT(OMPInterchangeDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
+DEF_TRAVERSE_STMT(OMPSplitDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
 DEF_TRAVERSE_STMT(OMPForDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index 626c39a7b778c..c5b83e17acbcd 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -6088,7 +6088,7 @@ class OMPSplitDirective final
   };
 
   explicit OMPSplitDirective(SourceLocation StartLoc, SourceLocation EndLoc,
-                            unsigned NumLoops)
+                             unsigned NumLoops)
       : OMPCanonicalLoopNestTransformationDirective(
             OMPSplitDirectiveClass, llvm::omp::OMPD_split, StartLoc, EndLoc,
             NumLoops) {}
@@ -6112,18 +6112,16 @@ class OMPSplitDirective final
   /// \param TransformedStmt The loop nest after splitting, or nullptr in
   ///                        dependent contexts.
   /// \param PreInits   Helper preinits statements for the loop nest.
-  static OMPSplitDirective *Create(const ASTContext &C,
-                                   SourceLocation StartLoc,
-                                   SourceLocation EndLoc,
-                                   Stmt *AssociatedStmt, unsigned NumLoops,
-                                   Stmt *TransformedStmt, Stmt *PreInits);
+  static OMPSplitDirective *Create(const ASTContext &C, SourceLocation StartLoc,
+                                   SourceLocation EndLoc, Stmt *AssociatedStmt,
+                                   unsigned NumLoops, Stmt *TransformedStmt,
+                                   Stmt *PreInits);
 
   /// Build an empty '#pragma omp split' AST node for deserialization.
   ///
   /// \param C          Context of the AST.
   /// \param NumLoops   Number of associated loops to allocate
-  static OMPSplitDirective *CreateEmpty(const ASTContext &C,
-                                        unsigned NumLoops);
+  static OMPSplitDirective *CreateEmpty(const ASTContext &C, unsigned NumLoops);
 
   /// Gets/sets the associated loops after the transformation, i.e. after
   /// de-sugaring.
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index ada4e66b280f8..6c939cf7f9aeb 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -552,13 +552,11 @@ OMPInterchangeDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
       SourceLocation(), SourceLocation(), NumLoops);
 }
 
-OMPSplitDirective *OMPSplitDirective::Create(const ASTContext &C,
-                                             SourceLocation StartLoc,
-                                             SourceLocation EndLoc,
-                                             Stmt *AssociatedStmt,
-                                             unsigned NumLoops,
-                                             Stmt *TransformedStmt,
-                                             Stmt *PreInits) {
+OMPSplitDirective *
+OMPSplitDirective::Create(const ASTContext &C, SourceLocation StartLoc,
+                          SourceLocation EndLoc, Stmt *AssociatedStmt,
+                          unsigned NumLoops, Stmt *TransformedStmt,
+                          Stmt *PreInits) {
   OMPSplitDirective *Dir = createDirective<OMPSplitDirective>(
       C, {}, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc,
       NumLoops);
@@ -568,7 +566,7 @@ OMPSplitDirective *OMPSplitDirective::Create(const ASTContext &C,
 }
 
 OMPSplitDirective *OMPSplitDirective::CreateEmpty(const ASTContext &C,
-                                                   unsigned NumLoops) {
+                                                  unsigned NumLoops) {
   return createEmptyDirective<OMPSplitDirective>(
       C, /*NumClauses=*/0, /*HasAssociatedStmt=*/true,
       TransformedStmtOffset + 1, SourceLocation(), SourceLocation(), NumLoops);

>From 041dc03d09b44658491db9b739f419f8db7237a6 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Fri, 6 Mar 2026 06:29:06 -0500
Subject: [PATCH 03/32] wip

---
 clang/lib/AST/StmtPrinter.cpp             |  5 +++++
 clang/lib/AST/StmtProfile.cpp             |  4 ++++
 clang/lib/Sema/TreeTransform.h            | 11 +++++++++++
 clang/lib/Serialization/ASTReaderStmt.cpp |  4 ++++
 clang/lib/Serialization/ASTWriterStmt.cpp |  5 +++++
 5 files changed, 29 insertions(+)

diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 4d364fdcd5502..e0b930ba0a21a 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -800,6 +800,11 @@ void StmtPrinter::VisitOMPInterchangeDirective(OMPInterchangeDirective *Node) {
   PrintOMPExecutableDirective(Node);
 }
 
+void StmtPrinter::VisitOMPSplitDirective(OMPSplitDirective *Node) {
+  Indent() << "#pragma omp split";
+  PrintOMPExecutableDirective(Node);
+}
+
 void StmtPrinter::VisitOMPFuseDirective(OMPFuseDirective *Node) {
   Indent() << "#pragma omp fuse";
   PrintOMPExecutableDirective(Node);
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index e8c1f8a8ecb5f..6f929c060ebe7 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -1051,6 +1051,10 @@ void StmtProfiler::VisitOMPInterchangeDirective(
   VisitOMPCanonicalLoopNestTransformationDirective(S);
 }
 
+void StmtProfiler::VisitOMPSplitDirective(const OMPSplitDirective *S) {
+  VisitOMPCanonicalLoopNestTransformationDirective(S);
+}
+
 void StmtProfiler::VisitOMPCanonicalLoopSequenceTransformationDirective(
     const OMPCanonicalLoopSequenceTransformationDirective *S) {
   VisitOMPExecutableDirective(S);
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 8ae5df367e0dd..b22c500048854 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -9759,6 +9759,17 @@ StmtResult TreeTransform<Derived>::TransformOMPInterchangeDirective(
   return Res;
 }
 
+template <typename Derived>
+StmtResult
+TreeTransform<Derived>::TransformOMPSplitDirective(OMPSplitDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().OpenMP().StartOpenMPDSABlock(
+      D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
 template <typename Derived>
 StmtResult
 TreeTransform<Derived>::TransformOMPFuseDirective(OMPFuseDirective *D) {
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 801eed43c2440..cb7aa7a1b7cf1 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2529,6 +2529,10 @@ void ASTStmtReader::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) {
   VisitOMPCanonicalLoopNestTransformationDirective(D);
 }
 
+void ASTStmtReader::VisitOMPSplitDirective(OMPSplitDirective *D) {
+  VisitOMPCanonicalLoopNestTransformationDirective(D);
+}
+
 void ASTStmtReader::VisitOMPFuseDirective(OMPFuseDirective *D) {
   VisitOMPCanonicalLoopSequenceTransformationDirective(D);
 }
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 934a95df1be7e..4612cd2a7944d 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -2546,6 +2546,11 @@ void ASTStmtWriter::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) {
   Code = serialization::STMT_OMP_INTERCHANGE_DIRECTIVE;
 }
 
+void ASTStmtWriter::VisitOMPSplitDirective(OMPSplitDirective *D) {
+  VisitOMPCanonicalLoopNestTransformationDirective(D);
+  Code = serialization::STMT_OMP_SPLIT_DIRECTIVE;
+}
+
 void ASTStmtWriter::VisitOMPCanonicalLoopSequenceTransformationDirective(
     OMPCanonicalLoopSequenceTransformationDirective *D) {
   VisitStmt(D);

>From c80ccfcaa1746476809690a346e6659d2e7e0c69 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Mon, 9 Mar 2026 07:42:22 -0400
Subject: [PATCH 04/32] wip

---
 clang/lib/CodeGen/CGStmt.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index a75d3dc64c6b4..7b6035a6968b1 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -230,6 +230,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
   case Stmt::OMPReverseDirectiveClass:
     EmitOMPReverseDirective(cast<OMPReverseDirective>(*S));
     break;
+  case Stmt::OMPSplitDirectiveClass:
+    EmitOMPSplitDirective(cast<OMPSplitDirective>(*S));
+    break;
   case Stmt::OMPInterchangeDirectiveClass:
     EmitOMPInterchangeDirective(cast<OMPInterchangeDirective>(*S));
     break;

>From 5f7166b64ce081e6deef7a4012686bf491c67c55 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Mon, 9 Mar 2026 08:16:47 -0400
Subject: [PATCH 05/32] wip

---
 clang/lib/Sema/SemaExceptionSpec.cpp         | 1 +
 clang/lib/Serialization/ASTReaderStmt.cpp    | 8 ++++++++
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 1 +
 clang/tools/libclang/CXCursor.cpp            | 3 +++
 4 files changed, 13 insertions(+)

diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index 56079ea8e1bf8..40d530a1f3925 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1508,6 +1508,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
   case Stmt::OMPUnrollDirectiveClass:
   case Stmt::OMPReverseDirectiveClass:
   case Stmt::OMPInterchangeDirectiveClass:
+  case Stmt::OMPSplitDirectiveClass:
   case Stmt::OMPFuseDirectiveClass:
   case Stmt::OMPSingleDirectiveClass:
   case Stmt::OMPTargetDataDirectiveClass:
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index cb7aa7a1b7cf1..79a6a03b3f276 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -3691,6 +3691,14 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       break;
     }
 
+    case STMT_OMP_SPLIT_DIRECTIVE: {
+      unsigned NumLoops = Record[ASTStmtReader::NumStmtFields];
+      assert(Record[ASTStmtReader::NumStmtFields + 1] == 0 &&
+             "Split directive has no clauses");
+      S = OMPSplitDirective::CreateEmpty(Context, NumLoops);
+      break;
+    }
+
     case STMT_OMP_FUSE_DIRECTIVE: {
       unsigned NumClauses = Record[ASTStmtReader::NumStmtFields];
       S = OMPFuseDirective::CreateEmpty(Context, NumClauses);
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index e9522a7975515..b6d2c96627520 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1812,6 +1812,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::OMPStripeDirectiveClass:
     case Stmt::OMPTileDirectiveClass:
     case Stmt::OMPInterchangeDirectiveClass:
+    case Stmt::OMPSplitDirectiveClass:
     case Stmt::OMPFuseDirectiveClass:
     case Stmt::OMPInteropDirectiveClass:
     case Stmt::OMPDispatchDirectiveClass:
diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index d31d2c0c9bb67..dd815147c48dc 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -697,6 +697,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
   case Stmt::OMPReverseDirectiveClass:
     K = CXCursor_OMPReverseDirective;
     break;
+  case Stmt::OMPSplitDirectiveClass:
+    K = CXCursor_UnexposedStmt;
+    break;
   case Stmt::OMPInterchangeDirectiveClass:
     K = CXCursor_OMPInterchangeDirective;
     break;

>From 7e221114e7e5d66d7d05008b42823198e9d53460 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Mon, 9 Mar 2026 12:13:02 -0400
Subject: [PATCH 06/32] wip

---
 clang/lib/CodeGen/CGStmt.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 7b6035a6968b1..ccd70451ae882 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -231,7 +231,8 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
     EmitOMPReverseDirective(cast<OMPReverseDirective>(*S));
     break;
   case Stmt::OMPSplitDirectiveClass:
-    EmitOMPSplitDirective(cast<OMPSplitDirective>(*S));
+    llvm_unreachable(
+        "OMPSplitDirective handled by EmitSimpleOMPExecutableDirective");
     break;
   case Stmt::OMPInterchangeDirectiveClass:
     EmitOMPInterchangeDirective(cast<OMPInterchangeDirective>(*S));

>From 00a9418fc4234e547230b8c3b9c01726c3ced726 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Fri, 13 Mar 2026 09:48:29 -0400
Subject: [PATCH 07/32] sema

---
 clang/include/clang-c/Index.h         |   4 +
 clang/include/clang/Sema/SemaOpenMP.h |   4 +
 clang/lib/Basic/OpenMPKinds.cpp       |   3 +-
 clang/lib/CodeGen/CGStmt.cpp          |   3 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp    |   8 +
 clang/lib/CodeGen/CodeGenFunction.h   |   1 +
 clang/lib/Parse/ParseOpenMP.cpp       |   4 +
 clang/lib/Sema/SemaOpenMP.cpp         | 205 ++++++++++++++++++++++++++
 clang/test/OpenMP/split_ast_print.cpp |  28 ++++
 clang/test/OpenMP/split_simple_test.c |  26 ++++
 clang/tools/libclang/CIndex.cpp       |   2 +
 clang/tools/libclang/CXCursor.cpp     |   2 +-
 12 files changed, 286 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/OpenMP/split_ast_print.cpp
 create mode 100644 clang/test/OpenMP/split_simple_test.c

diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h
index dcf1f4f1b4258..119bd68ff9814 100644
--- a/clang/include/clang-c/Index.h
+++ b/clang/include/clang-c/Index.h
@@ -2166,6 +2166,10 @@ enum CXCursorKind {
    */
   CXCursor_OMPFuseDirective = 311,
 
+  /** OpenMP split directive.
+   */
+  CXCursor_OMPSplitDirective = 312,
+
   /** OpenACC Compute Construct.
    */
   CXCursor_OpenACCComputeConstruct = 320,
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index 7853f29f98c25..57382557fd13f 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -457,6 +457,10 @@ class SemaOpenMP : public SemaBase {
   /// Called on well-formed '#pragma omp reverse'.
   StmtResult ActOnOpenMPReverseDirective(Stmt *AStmt, SourceLocation StartLoc,
                                          SourceLocation EndLoc);
+  /// Called on well-formed '#pragma omp split' after parsing of its
+  /// associated statement.
+  StmtResult ActOnOpenMPSplitDirective(Stmt *AStmt, SourceLocation StartLoc,
+                                       SourceLocation EndLoc);
   /// Called on well-formed '#pragma omp interchange' after parsing of its
   /// clauses and the associated statement.
   StmtResult ActOnOpenMPInterchangeDirective(ArrayRef<OMPClause *> Clauses,
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index 2c693b1958ee7..ef01943f11ca5 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -815,7 +815,8 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) {
 bool clang::isOpenMPCanonicalLoopNestTransformationDirective(
     OpenMPDirectiveKind DKind) {
   return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse ||
-         DKind == OMPD_interchange || DKind == OMPD_stripe;
+         DKind == OMPD_split || DKind == OMPD_interchange ||
+         DKind == OMPD_stripe;
 }
 
 bool clang::isOpenMPCanonicalLoopSequenceTransformationDirective(
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index ccd70451ae882..7b6035a6968b1 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -231,8 +231,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
     EmitOMPReverseDirective(cast<OMPReverseDirective>(*S));
     break;
   case Stmt::OMPSplitDirectiveClass:
-    llvm_unreachable(
-        "OMPSplitDirective handled by EmitSimpleOMPExecutableDirective");
+    EmitOMPSplitDirective(cast<OMPSplitDirective>(*S));
     break;
   case Stmt::OMPInterchangeDirectiveClass:
     EmitOMPInterchangeDirective(cast<OMPInterchangeDirective>(*S));
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 990ec47488465..2acced1f0da1a 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -197,6 +197,8 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
       PreInits = Unroll->getPreInits();
     } else if (const auto *Reverse = dyn_cast<OMPReverseDirective>(&S)) {
       PreInits = Reverse->getPreInits();
+    } else if (const auto *Split = dyn_cast<OMPSplitDirective>(&S)) {
+      PreInits = Split->getPreInits();
     } else if (const auto *Interchange =
                    dyn_cast<OMPInterchangeDirective>(&S)) {
       PreInits = Interchange->getPreInits();
@@ -3203,6 +3205,12 @@ void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) {
   EmitStmt(S.getTransformedStmt());
 }
 
+void CodeGenFunction::EmitOMPSplitDirective(const OMPSplitDirective &S) {
+  // Emit the de-sugared statement (the two split loops).
+  OMPTransformDirectiveScopeRAII SplitScope(*this, &S);
+  EmitStmt(S.getTransformedStmt());
+}
+
 void CodeGenFunction::EmitOMPInterchangeDirective(
     const OMPInterchangeDirective &S) {
   // Emit the de-sugared statement.
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index fd474c09044ef..f06c216e0c746 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3930,6 +3930,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   void EmitOMPStripeDirective(const OMPStripeDirective &S);
   void EmitOMPUnrollDirective(const OMPUnrollDirective &S);
   void EmitOMPReverseDirective(const OMPReverseDirective &S);
+  void EmitOMPSplitDirective(const OMPSplitDirective &S);
   void EmitOMPInterchangeDirective(const OMPInterchangeDirective &S);
   void EmitOMPFuseDirective(const OMPFuseDirective &S);
   void EmitOMPForDirective(const OMPForDirective &S);
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index 29397d67b5bcc..a0783a3737ad8 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -3432,6 +3432,10 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
     }
     Clause = ParseOpenMPPermutationClause();
     break;
+  case OMPC_counts:
+    // TODO: Implement ParseOpenMPCountsClause() - not yet worked on
+    SkipUntil(tok::r_paren, tok::comma, StopBeforeMatch);
+    break;
   case OMPC_uses_allocators:
     Clause = ParseOpenMPUsesAllocatorClause(DKind);
     break;
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 0d3c7fc4907a2..9358316132ca4 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -4627,6 +4627,7 @@ void SemaOpenMP::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind,
   case OMPD_stripe:
   case OMPD_unroll:
   case OMPD_reverse:
+  case OMPD_split:
   case OMPD_interchange:
   case OMPD_fuse:
   case OMPD_assume:
@@ -6466,6 +6467,13 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective(
            "reverse directive does not support any clauses");
     Res = ActOnOpenMPReverseDirective(AStmt, StartLoc, EndLoc);
     break;
+  case OMPD_split:
+    // TODO: Add counts clause support - not yet worked on
+    // Currently only supports basic split without clauses.
+    assert(ClausesWithImplicit.empty() &&
+           "split directive does not support any clauses");
+    Res = ActOnOpenMPSplitDirective(AStmt, StartLoc, EndLoc);
+    break;
   case OMPD_interchange:
     Res = ActOnOpenMPInterchangeDirective(ClausesWithImplicit, AStmt, StartLoc,
                                           EndLoc);
@@ -15907,6 +15915,203 @@ StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt,
                                      buildPreInits(Context, PreInits));
 }
 
+StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(Stmt *AStmt,
+                                                 SourceLocation StartLoc,
+                                                 SourceLocation EndLoc) {
+  ASTContext &Context = getASTContext();
+  Scope *CurScope = SemaRef.getCurScope();
+
+  // Empty statement should only be possible if there already was an error.
+  if (!AStmt)
+    return StmtError();
+
+  constexpr unsigned NumLoops = 1;
+  Stmt *Body = nullptr;
+  SmallVector<OMPLoopBasedDirective::HelperExprs, NumLoops> LoopHelpers(
+      NumLoops);
+  SmallVector<SmallVector<Stmt *>, NumLoops + 1> OriginalInits;
+  if (!checkTransformableLoopNest(OMPD_split, AStmt, NumLoops, LoopHelpers,
+                                  Body, OriginalInits))
+    return StmtError();
+
+  // Delay applying the transformation to when template is completely
+  // instantiated.
+  if (SemaRef.CurContext->isDependentContext())
+    return OMPSplitDirective::Create(Context, StartLoc, EndLoc, AStmt, NumLoops,
+                                     nullptr, nullptr);
+
+  assert(LoopHelpers.size() == NumLoops &&
+         "Expecting a single-dimensional loop iteration space");
+  assert(OriginalInits.size() == NumLoops &&
+         "Expecting a single-dimensional loop iteration space");
+  OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers.front();
+
+  // Find the loop statement.
+  Stmt *LoopStmt = nullptr;
+  collectLoopStmts(AStmt, {LoopStmt});
+
+  // Determine the PreInit declarations.
+  SmallVector<Stmt *> PreInits;
+  addLoopPreInits(Context, LoopHelper, LoopStmt, OriginalInits[0], PreInits);
+
+  auto *IterationVarRef = cast<DeclRefExpr>(LoopHelper.IterationVarRef);
+  QualType IVTy = IterationVarRef->getType();
+  uint64_t IVWidth = Context.getTypeSize(IVTy);
+  auto *OrigVar = cast<DeclRefExpr>(LoopHelper.Counters.front());
+
+  // Iteration variable SourceLocations.
+  SourceLocation OrigVarLoc = OrigVar->getExprLoc();
+  SourceLocation OrigVarLocBegin = OrigVar->getBeginLoc();
+  SourceLocation OrigVarLocEnd = OrigVar->getEndLoc();
+
+  // Locations pointing to the transformation.
+  SourceLocation TransformLoc = StartLoc;
+
+  // Internal variable names.
+  std::string OrigVarName = OrigVar->getNameInfo().getAsString();
+
+  // For Subexpressions with more than one use, we define a lambda
+  // that creates a new AST node at every use.
+  CaptureVars CopyTransformer(SemaRef);
+  auto MakeNumIterations = [&CopyTransformer, &LoopHelper]() -> Expr * {
+    return AssertSuccess(
+        CopyTransformer.TransformExpr(LoopHelper.NumIterations));
+  };
+
+  // For split, we currently divide the loop into two equal parts.
+  // First loop: i = 0; i < n/2; ++i
+  // Second loop: i = n/2; i < n; ++i
+  // TODO: Add counts clause support - not yet worked on
+
+  // Create iteration variable for the first split loop.
+  SmallString<64> FirstIVName(".split.first.iv.");
+  FirstIVName += OrigVarName;
+  VarDecl *FirstIVDecl =
+      buildVarDecl(SemaRef, {}, IVTy, FirstIVName, nullptr, OrigVar);
+  auto MakeFirstRef = [&SemaRef = this->SemaRef, FirstIVDecl, IVTy,
+                       OrigVarLoc]() {
+    return buildDeclRefExpr(SemaRef, FirstIVDecl, IVTy, OrigVarLoc);
+  };
+
+  // Create iteration variable for the second split loop.
+  SmallString<64> SecondIVName(".split.second.iv.");
+  SecondIVName += OrigVarName;
+  VarDecl *SecondIVDecl =
+      buildVarDecl(SemaRef, {}, IVTy, SecondIVName, nullptr, OrigVar);
+  auto MakeSecondRef = [&SemaRef = this->SemaRef, SecondIVDecl, IVTy,
+                        OrigVarLoc]() {
+    return buildDeclRefExpr(SemaRef, SecondIVDecl, IVTy, OrigVarLoc);
+  };
+
+  // Create n/2 expression for the split point.
+  auto *Two = IntegerLiteral::Create(Context, llvm::APInt(IVWidth, 2), IVTy,
+                                     TransformLoc);
+  ExprResult HalfIterations = SemaRef.BuildBinOp(CurScope, TransformLoc, BO_Div,
+                                                 MakeNumIterations(), Two);
+  if (!HalfIterations.isUsable())
+    return StmtError();
+
+  // First loop: init-statement: i = 0
+  auto *Zero = IntegerLiteral::Create(Context, llvm::APInt::getZero(IVWidth),
+                                      FirstIVDecl->getType(), OrigVarLoc);
+  SemaRef.AddInitializerToDecl(FirstIVDecl, Zero, /*DirectInit=*/false);
+  StmtResult FirstInit = new (Context)
+      DeclStmt(DeclGroupRef(FirstIVDecl), OrigVarLocBegin, OrigVarLocEnd);
+  if (!FirstInit.isUsable())
+    return StmtError();
+
+  // First loop: cond-expression (i < n/2)
+  ExprResult FirstCond =
+      SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT,
+                         MakeFirstRef(), HalfIterations.get());
+  if (!FirstCond.isUsable())
+    return StmtError();
+
+  // First loop: incr-statement (++i)
+  ExprResult FirstIncr = SemaRef.BuildUnaryOp(
+      CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, MakeFirstRef());
+  if (!FirstIncr.isUsable())
+    return StmtError();
+
+  // First loop: body - update original variable and execute body
+  // We need to create a copy of LoopHelper.Updates that uses FirstIV instead
+  // of the iteration variable. For now, use a simpler approach: directly
+  // assign the first IV to the original variable.
+  SmallVector<Stmt *, 4> FirstBodyStmts;
+  // Create update statement: origVar = .split.first.iv
+  // We'll use a BinaryOperator for assignment
+  ExprResult FirstUpdateExpr = SemaRef.BuildBinOp(
+      CurScope, OrigVarLoc, BO_Assign, OrigVar, MakeFirstRef());
+  if (!FirstUpdateExpr.isUsable())
+    return StmtError();
+  FirstBodyStmts.push_back(FirstUpdateExpr.get());
+  if (auto *CXXRangeFor = dyn_cast<CXXForRangeStmt>(LoopStmt))
+    FirstBodyStmts.push_back(CXXRangeFor->getLoopVarStmt());
+  FirstBodyStmts.push_back(Body);
+  auto *FirstBody =
+      CompoundStmt::Create(Context, FirstBodyStmts, FPOptionsOverride(),
+                           Body->getBeginLoc(), Body->getEndLoc());
+
+  // Create first loop
+  auto *FirstLoop = new (Context)
+      ForStmt(Context, FirstInit.get(), FirstCond.get(), nullptr,
+              FirstIncr.get(), FirstBody, LoopHelper.Init->getBeginLoc(),
+              LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc());
+
+  // Second loop: init-statement (i = n/2)
+  SemaRef.AddInitializerToDecl(SecondIVDecl, HalfIterations.get(),
+                               /*DirectInit=*/false);
+  StmtResult SecondInit = new (Context)
+      DeclStmt(DeclGroupRef(SecondIVDecl), OrigVarLocBegin, OrigVarLocEnd);
+  if (!SecondInit.isUsable())
+    return StmtError();
+
+  // Second loop: cond-expression (i < n)
+  ExprResult SecondCond =
+      SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT,
+                         MakeSecondRef(), MakeNumIterations());
+  if (!SecondCond.isUsable())
+    return StmtError();
+
+  // Second loop: incr-statement (++i)
+  ExprResult SecondIncr = SemaRef.BuildUnaryOp(
+      CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, MakeSecondRef());
+  if (!SecondIncr.isUsable())
+    return StmtError();
+
+  // Second loop: body - update original variable and execute body
+  SmallVector<Stmt *, 4> SecondBodyStmts;
+  // Create update statement: origVar = .split.second.iv
+  ExprResult SecondUpdateExpr = SemaRef.BuildBinOp(
+      CurScope, OrigVarLoc, BO_Assign, OrigVar, MakeSecondRef());
+  if (!SecondUpdateExpr.isUsable())
+    return StmtError();
+  SecondBodyStmts.push_back(SecondUpdateExpr.get());
+  if (auto *CXXRangeFor = dyn_cast<CXXForRangeStmt>(LoopStmt))
+    SecondBodyStmts.push_back(CXXRangeFor->getLoopVarStmt());
+  SecondBodyStmts.push_back(Body);
+  auto *SecondBody =
+      CompoundStmt::Create(Context, SecondBodyStmts, FPOptionsOverride(),
+                           Body->getBeginLoc(), Body->getEndLoc());
+
+  // Create second loop
+  auto *SecondLoop = new (Context)
+      ForStmt(Context, SecondInit.get(), SecondCond.get(), nullptr,
+              SecondIncr.get(), SecondBody, LoopHelper.Init->getBeginLoc(),
+              LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc());
+
+  // Combine both loops into a compound statement
+  SmallVector<Stmt *, 2> SplitLoops;
+  SplitLoops.push_back(FirstLoop);
+  SplitLoops.push_back(SecondLoop);
+  auto *SplitStmt =
+      CompoundStmt::Create(Context, SplitLoops, FPOptionsOverride(),
+                           FirstLoop->getBeginLoc(), SecondLoop->getEndLoc());
+
+  return OMPSplitDirective::Create(Context, StartLoc, EndLoc, AStmt, NumLoops,
+                                   SplitStmt, buildPreInits(Context, PreInits));
+}
+
 StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective(
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc) {
diff --git a/clang/test/OpenMP/split_ast_print.cpp b/clang/test/OpenMP/split_ast_print.cpp
new file mode 100644
index 0000000000000..b24eae4a9bead
--- /dev/null
+++ b/clang/test/OpenMP/split_ast_print.cpp
@@ -0,0 +1,28 @@
+// Check no warnings/errors and that split is recognized
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s
+// expected-no-diagnostics
+
+// Check AST: OMPSplitDirective with associated for-loop
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP
+
+// Check unparsing
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT
+
+#ifndef HEADER
+#define HEADER
+
+extern "C" void body(...);
+
+// PRINT-LABEL: void foo(
+// DUMP-LABEL:  FunctionDecl {{.*}} foo
+void foo(int n) {
+  // PRINT:     #pragma omp split
+  // DUMP:      OMPSplitDirective
+  #pragma omp split
+  // PRINT: for (int i = 0; i < n; ++i)
+  // DUMP:      ForStmt
+  for (int i = 0; i < n; ++i)
+    body(i);
+}
+
+#endif
diff --git a/clang/test/OpenMP/split_simple_test.c b/clang/test/OpenMP/split_simple_test.c
new file mode 100644
index 0000000000000..bc0d3c4770890
--- /dev/null
+++ b/clang/test/OpenMP/split_simple_test.c
@@ -0,0 +1,26 @@
+/*
+ * Simple test for #pragma omp split: one canonical for-loop is transformed
+ * into two loops (first half and second half of iterations). This file
+ * verifies compilation and correct result at runtime.
+ *
+ * Compile: clang -fopenmp -fopenmp-version=60 -o split_simple_test split_simple_test.c
+ * Run:     ./split_simple_test
+ * Expected: prints "sum 0..9 = 45 (expected 45)", exit code 0.
+ */
+// Verify the split directive compiles and links.
+// RUN: %clang -fopenmp -fopenmp-version=60 -o %t %s
+
+#include <stdio.h>
+
+int main(void) {
+  const int n = 10;
+  int sum = 0;
+
+#pragma omp split
+  for (int i = 0; i < n; ++i) {
+    sum += i;
+  }
+
+  printf("sum 0..%d = %d (expected %d)\n", n - 1, sum, n * (n - 1) / 2);
+  return (sum == n * (n - 1) / 2) ? 0 : 1;
+}
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 3ee37ed2dfc27..9016bc6e6e418 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -6326,6 +6326,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) {
     return cxstring::createRef("OMPInterchangeDirective");
   case CXCursor_OMPFuseDirective:
     return cxstring::createRef("OMPFuseDirective");
+  case CXCursor_OMPSplitDirective:
+    return cxstring::createRef("OMPSplitDirective");
   case CXCursor_OMPForDirective:
     return cxstring::createRef("OMPForDirective");
   case CXCursor_OMPForSimdDirective:
diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index dd815147c48dc..242380c68c667 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -698,7 +698,7 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
     K = CXCursor_OMPReverseDirective;
     break;
   case Stmt::OMPSplitDirectiveClass:
-    K = CXCursor_UnexposedStmt;
+    K = CXCursor_OMPSplitDirective;
     break;
   case Stmt::OMPInterchangeDirectiveClass:
     K = CXCursor_OMPInterchangeDirective;

>From ded677bb341a36acfba3281b18ef65cbd2ff4567 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Fri, 13 Mar 2026 10:21:40 -0400
Subject: [PATCH 08/32] duplicate_split_def_removed

---
 llvm/include/llvm/Frontend/OpenMP/OMP.td | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 214366c3be59b..7e1a232d6bad7 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -1203,16 +1203,6 @@ def OMP_EndSingle : Directive<[Spelling<"end single">]> {
   let category = OMP_Single.category;
   let languages = [L_Fortran];
 }
-def OMP_Split : Directive<[Spelling<"split">]> {
-  let allowedClauses = [
-    VersionedClause<OMPC_Apply, 60>,
-  ];
-  let allowedOnceClauses = [
-    VersionedClause<OMPC_Counts, 60>,
-  ];
-  let association = AS_LoopNest;
-  let category = CA_Executable;
-}
 def OMP_Target : Directive<[Spelling<"target">]> {
   let allowedClauses = [
     VersionedClause<OMPC_Allocate>,

>From a8d880911b54f91612115c78975d411babffb858 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Fri, 13 Mar 2026 12:29:17 -0400
Subject: [PATCH 09/32] cursorkind_enum_added

---
 clang/bindings/python/clang/cindex.py |  3 +++
 clang/test/OpenMP/split_simple_test.c | 20 +++++++++-----------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py
index b71f9ed2275e0..a90d48cf6d481 100644
--- a/clang/bindings/python/clang/cindex.py
+++ b/clang/bindings/python/clang/cindex.py
@@ -1453,6 +1453,9 @@ def is_unexposed(self):
     # OpenMP fuse directive.
     OMP_FUSE_DIRECTIVE = 311
 
+    # OpenMP split directive.
+    OMP_SPLIT_DIRECTIVE = 312
+
     # OpenACC Compute Construct.
     OPEN_ACC_COMPUTE_DIRECTIVE = 320
 
diff --git a/clang/test/OpenMP/split_simple_test.c b/clang/test/OpenMP/split_simple_test.c
index bc0d3c4770890..62dbc1cd861e5 100644
--- a/clang/test/OpenMP/split_simple_test.c
+++ b/clang/test/OpenMP/split_simple_test.c
@@ -1,16 +1,10 @@
 /*
  * Simple test for #pragma omp split: one canonical for-loop is transformed
- * into two loops (first half and second half of iterations). This file
- * verifies compilation and correct result at runtime.
- *
- * Compile: clang -fopenmp -fopenmp-version=60 -o split_simple_test split_simple_test.c
- * Run:     ./split_simple_test
- * Expected: prints "sum 0..9 = 45 (expected 45)", exit code 0.
+ * into two loops (first half and second half of iterations).
  */
-// Verify the split directive compiles and links.
-// RUN: %clang -fopenmp -fopenmp-version=60 -o %t %s
-
-#include <stdio.h>
+// Verify the split directive compiles and emits IR (two sequential loops).
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -triple x86_64-unknown-unknown
+// -emit-llvm %s -o - | FileCheck %s
 
 int main(void) {
   const int n = 10;
@@ -21,6 +15,10 @@ int main(void) {
     sum += i;
   }
 
-  printf("sum 0..%d = %d (expected %d)\n", n - 1, sum, n * (n - 1) / 2);
   return (sum == n * (n - 1) / 2) ? 0 : 1;
 }
+
+// CHECK: define
+// CHECK: load
+// Split produces two sequential loops; ensure we have loop structure in IR.
+// CHECK: br i1

>From 55821b134e7b6ab9efaec0a7c3a05acef29dcb4b Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Thu, 19 Mar 2026 04:32:37 -0400
Subject: [PATCH 10/32] counts_clause_defined

---
 clang/include/clang/AST/OpenMPClause.h   | 87 ++++++++++++++++++++++++
 clang/include/clang/AST/StmtOpenMP.h     | 15 ++--
 clang/lib/AST/OpenMPClause.cpp           | 31 +++++++++
 clang/lib/AST/StmtOpenMP.cpp             | 13 ++--
 clang/lib/Basic/OpenMPKinds.cpp          |  2 +
 llvm/include/llvm/Frontend/OpenMP/OMP.td |  8 ++-
 6 files changed, 144 insertions(+), 12 deletions(-)

diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index af5d3f4698eda..dbc22e23c3704 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -1023,6 +1023,93 @@ class OMPSizesClause final
   }
 };
 
+/// This represents the 'counts' clause in the '#pragma omp split' directive.
+///
+/// \code
+/// #pragma omp split counts(3, 5, 2)
+/// for (int i = 0; i < n; ++i) { ... }
+/// \endcode
+class OMPCountsClause final
+    : public OMPClause,
+      private llvm::TrailingObjects<OMPCountsClause, Expr *> {
+  friend class OMPClauseReader;
+  friend class llvm::TrailingObjects<OMPCountsClause, Expr *>;
+
+  /// Location of '('.
+  SourceLocation LParenLoc;
+
+  /// Number of count expressions in the clause.
+  unsigned NumCounts;
+
+  /// Build an empty clause.
+  explicit OMPCountsClause(int NumCounts)
+      : OMPClause(llvm::omp::OMPC_counts, SourceLocation(), SourceLocation()),
+        NumCounts(NumCounts) {}
+
+public:
+  /// Build a 'counts' AST node.
+  ///
+  /// \param C         Context of the AST.
+  /// \param StartLoc  Location of the 'counts' identifier.
+  /// \param LParenLoc Location of '('.
+  /// \param EndLoc    Location of ')'.
+  /// \param Counts    Content of the clause.
+  static OMPCountsClause *Create(const ASTContext &C, SourceLocation StartLoc,
+                                 SourceLocation LParenLoc,
+                                 SourceLocation EndLoc,
+                                 ArrayRef<Expr *> Counts);
+
+  /// Build an empty 'counts' AST node for deserialization.
+  ///
+  /// \param C          Context of the AST.
+  /// \param NumCounts   Number of items in the clause.
+  static OMPCountsClause *CreateEmpty(const ASTContext &C, unsigned NumCounts);
+
+  /// Sets the location of '('.
+  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
+  /// Returns the location of '('.
+  SourceLocation getLParenLoc() const { return LParenLoc; }
+
+  /// Returns the number of list items.
+  unsigned getNumCounts() const { return NumCounts; }
+
+  /// Returns the count expressions.
+  MutableArrayRef<Expr *> getCountsRefs() {
+    return getTrailingObjects(NumCounts);
+  }
+  ArrayRef<Expr *> getCountsRefs() const {
+    return getTrailingObjects(NumCounts);
+  }
+
+  /// Sets the count expressions.
+  void setCountsRefs(ArrayRef<Expr *> VL) {
+    assert(VL.size() == NumCounts);
+    llvm::copy(VL, getCountsRefs().begin());
+  }
+
+  child_range children() {
+    MutableArrayRef<Expr *> Counts = getCountsRefs();
+    return child_range(reinterpret_cast<Stmt **>(Counts.begin()),
+                       reinterpret_cast<Stmt **>(Counts.end()));
+  }
+  const_child_range children() const {
+    ArrayRef<Expr *> Counts = getCountsRefs();
+    return const_child_range(reinterpret_cast<Stmt *const *>(Counts.begin()),
+                             reinterpret_cast<Stmt *const *>(Counts.end()));
+  }
+  child_range used_children() {
+    return child_range(child_iterator(), child_iterator());
+  }
+  const_child_range used_children() const {
+    return const_child_range(const_child_iterator(), const_child_iterator());
+  }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == llvm::omp::OMPC_counts;
+  }
+};
+
 /// This class represents the 'permutation' clause in the
 /// '#pragma omp interchange' directive.
 ///
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index c5b83e17acbcd..bdaf73c0a6607 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -6107,21 +6107,26 @@ class OMPSplitDirective final
   /// \param C         Context of the AST.
   /// \param StartLoc  Location of the introducer (e.g. the 'omp' token).
   /// \param EndLoc    Location of the directive's end (e.g. the tok::eod).
+  /// \param Clauses   The directive's clauses (e.g. the required \c counts
+  ///                  clause).
   /// \param NumLoops  Number of affected loops (should be 1 for split).
   /// \param AssociatedStmt  The outermost associated loop.
   /// \param TransformedStmt The loop nest after splitting, or nullptr in
   ///                        dependent contexts.
   /// \param PreInits   Helper preinits statements for the loop nest.
   static OMPSplitDirective *Create(const ASTContext &C, SourceLocation StartLoc,
-                                   SourceLocation EndLoc, Stmt *AssociatedStmt,
-                                   unsigned NumLoops, Stmt *TransformedStmt,
-                                   Stmt *PreInits);
+                                   SourceLocation EndLoc,
+                                   ArrayRef<OMPClause *> Clauses,
+                                   unsigned NumLoops, Stmt *AssociatedStmt,
+                                   Stmt *TransformedStmt, Stmt *PreInits);
 
   /// Build an empty '#pragma omp split' AST node for deserialization.
   ///
   /// \param C          Context of the AST.
-  /// \param NumLoops   Number of associated loops to allocate
-  static OMPSplitDirective *CreateEmpty(const ASTContext &C, unsigned NumLoops);
+  /// \param NumClauses Number of clauses to allocate.
+  /// \param NumLoops   Number of associated loops to allocate.
+  static OMPSplitDirective *CreateEmpty(const ASTContext &C,
+                                        unsigned NumClauses, unsigned NumLoops);
 
   /// Gets/sets the associated loops after the transformation, i.e. after
   /// de-sugaring.
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index d4826c3c6edca..ab6fa1c673411 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -986,6 +986,25 @@ OMPSizesClause *OMPSizesClause::CreateEmpty(const ASTContext &C,
   return new (Mem) OMPSizesClause(NumSizes);
 }
 
+OMPCountsClause *OMPCountsClause::Create(const ASTContext &C,
+                                         SourceLocation StartLoc,
+                                         SourceLocation LParenLoc,
+                                         SourceLocation EndLoc,
+                                         ArrayRef<Expr *> Counts) {
+  OMPCountsClause *Clause = CreateEmpty(C, Counts.size());
+  Clause->setLocStart(StartLoc);
+  Clause->setLParenLoc(LParenLoc);
+  Clause->setLocEnd(EndLoc);
+  Clause->setCountsRefs(Counts);
+  return Clause;
+}
+
+OMPCountsClause *OMPCountsClause::CreateEmpty(const ASTContext &C,
+                                              unsigned NumCounts) {
+  void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(NumCounts));
+  return new (Mem) OMPCountsClause(NumCounts);
+}
+
 OMPPermutationClause *OMPPermutationClause::Create(const ASTContext &C,
                                                    SourceLocation StartLoc,
                                                    SourceLocation LParenLoc,
@@ -1984,6 +2003,18 @@ void OMPClausePrinter::VisitOMPSizesClause(OMPSizesClause *Node) {
   OS << ")";
 }
 
+void OMPClausePrinter::VisitOMPCountsClause(OMPCountsClause *Node) {
+  OS << "counts(";
+  bool First = true;
+  for (auto *Count : Node->getCountsRefs()) {
+    if (!First)
+      OS << ", ";
+    Count->printPretty(OS, nullptr, Policy, 0);
+    First = false;
+  }
+  OS << ")";
+}
+
 void OMPClausePrinter::VisitOMPPermutationClause(OMPPermutationClause *Node) {
   OS << "permutation(";
   llvm::interleaveComma(Node->getArgsRefs(), OS, [&](const Expr *E) {
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index 6c939cf7f9aeb..9d6b315effb41 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -554,11 +554,11 @@ OMPInterchangeDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
 
 OMPSplitDirective *
 OMPSplitDirective::Create(const ASTContext &C, SourceLocation StartLoc,
-                          SourceLocation EndLoc, Stmt *AssociatedStmt,
-                          unsigned NumLoops, Stmt *TransformedStmt,
-                          Stmt *PreInits) {
+                          SourceLocation EndLoc, ArrayRef<OMPClause *> Clauses,
+                          unsigned NumLoops, Stmt *AssociatedStmt,
+                          Stmt *TransformedStmt, Stmt *PreInits) {
   OMPSplitDirective *Dir = createDirective<OMPSplitDirective>(
-      C, {}, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc,
+      C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc,
       NumLoops);
   Dir->setTransformedStmt(TransformedStmt);
   Dir->setPreInits(PreInits);
@@ -566,10 +566,11 @@ OMPSplitDirective::Create(const ASTContext &C, SourceLocation StartLoc,
 }
 
 OMPSplitDirective *OMPSplitDirective::CreateEmpty(const ASTContext &C,
+                                                  unsigned NumClauses,
                                                   unsigned NumLoops) {
   return createEmptyDirective<OMPSplitDirective>(
-      C, /*NumClauses=*/0, /*HasAssociatedStmt=*/true,
-      TransformedStmtOffset + 1, SourceLocation(), SourceLocation(), NumLoops);
+      C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1,
+      SourceLocation(), SourceLocation(), NumLoops);
 }
 
 OMPFuseDirective *OMPFuseDirective::Create(
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index ef01943f11ca5..287eb217ba458 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -256,6 +256,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str,
   case OMPC_safelen:
   case OMPC_simdlen:
   case OMPC_sizes:
+  case OMPC_counts:
   case OMPC_permutation:
   case OMPC_allocator:
   case OMPC_collapse:
@@ -635,6 +636,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind,
   case OMPC_safelen:
   case OMPC_simdlen:
   case OMPC_sizes:
+  case OMPC_counts:
   case OMPC_permutation:
   case OMPC_allocator:
   case OMPC_collapse:
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 7e1a232d6bad7..0f2074c549c83 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -142,6 +142,7 @@ def OMPC_CopyPrivate : Clause<[Spelling<"copyprivate">]> {
   let flangClass = "OmpObjectList";
 }
 def OMPC_Counts : Clause<[Spelling<"counts">]> {
+  let clangClass = "OMPCountsClause";
 }
 def OMPC_Default : Clause<[Spelling<"default">]> {
   let clangClass = "OMPDefaultClause";
@@ -1426,7 +1427,12 @@ def OMP_Stripe : Directive<[Spelling<"stripe">]> {
   let category = CA_Executable;
 }
 def OMP_Split : Directive<[Spelling<"split">]> {
-  // TODO: Add counts clause support (OMPC_Counts)
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_Counts, 60>,
+  ];
+  let requiredClauses = [
+    VersionedClause<OMPC_Counts, 60>,
+  ];
   let association = AS_LoopNest;
   let category = CA_Executable;
 }

>From e8d84da9f12c17905439346342f2b7eafebf3549 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Thu, 19 Mar 2026 04:33:51 -0400
Subject: [PATCH 11/32] parse & sema

---
 clang/include/clang/AST/RecursiveASTVisitor.h |   7 +
 clang/include/clang/Parse/Parser.h            |   3 +
 clang/include/clang/Sema/SemaOpenMP.h         |   8 +-
 clang/lib/Parse/ParseOpenMP.cpp               |  24 +-
 clang/lib/Sema/SemaOpenMP.cpp                 | 278 +++++++++---------
 clang/lib/Sema/TreeTransform.h                |  29 ++
 6 files changed, 205 insertions(+), 144 deletions(-)

diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 0802871c4a720..1a14dd2c666b5 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -3506,6 +3506,13 @@ bool RecursiveASTVisitor<Derived>::VisitOMPSizesClause(OMPSizesClause *C) {
   return true;
 }
 
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPCountsClause(OMPCountsClause *C) {
+  for (Expr *E : C->getCountsRefs())
+    TRY_TO(TraverseStmt(E));
+  return true;
+}
+
 template <typename Derived>
 bool RecursiveASTVisitor<Derived>::VisitOMPPermutationClause(
     OMPPermutationClause *C) {
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index 08a3d88ee6a36..bd313d37cc4b5 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -6812,6 +6812,9 @@ class Parser : public CodeCompletionHandler {
   /// Parses the 'sizes' clause of a '#pragma omp tile' directive.
   OMPClause *ParseOpenMPSizesClause();
 
+  /// Parses the 'counts' clause of a '#pragma omp split' directive.
+  OMPClause *ParseOpenMPCountsClause();
+
   /// Parses the 'permutation' clause of a '#pragma omp interchange' directive.
   OMPClause *ParseOpenMPPermutationClause();
 
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index 57382557fd13f..c4c1b2ad33f71 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -459,7 +459,8 @@ class SemaOpenMP : public SemaBase {
                                          SourceLocation EndLoc);
   /// Called on well-formed '#pragma omp split' after parsing of its
   /// associated statement.
-  StmtResult ActOnOpenMPSplitDirective(Stmt *AStmt, SourceLocation StartLoc,
+  StmtResult ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
+                                       Stmt *AStmt, SourceLocation StartLoc,
                                        SourceLocation EndLoc);
   /// Called on well-formed '#pragma omp interchange' after parsing of its
   /// clauses and the associated statement.
@@ -915,6 +916,11 @@ class SemaOpenMP : public SemaBase {
                                     SourceLocation StartLoc,
                                     SourceLocation LParenLoc,
                                     SourceLocation EndLoc);
+  /// Called on well-formed 'counts' clause after parsing its arguments.
+  OMPClause *ActOnOpenMPCountsClause(ArrayRef<Expr *> CountExprs,
+                                     SourceLocation StartLoc,
+                                     SourceLocation LParenLoc,
+                                     SourceLocation EndLoc);
   /// Called on well-form 'permutation' clause after parsing its arguments.
   OMPClause *ActOnOpenMPPermutationClause(ArrayRef<Expr *> PermExprs,
                                           SourceLocation StartLoc,
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index a0783a3737ad8..65345462ae740 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -2424,6 +2424,10 @@ StmtResult Parser::ParseOpenMPExecutableDirective(
     Diag(Loc, diag::err_omp_required_clause)
         << getOpenMPDirectiveName(DKind, OMPVersion) << "sizes";
   }
+  if (DKind == OMPD_split && !SeenClauses[unsigned(OMPC_counts)]) {
+    Diag(Loc, diag::err_omp_required_clause)
+        << getOpenMPDirectiveName(DKind, OMPVersion) << "counts";
+  }
 
   StmtResult AssociatedStmt;
   if (HasAssociatedStatement) {
@@ -2986,6 +2990,17 @@ OMPClause *Parser::ParseOpenMPSizesClause() {
                                                  OpenLoc, CloseLoc);
 }
 
+OMPClause *Parser::ParseOpenMPCountsClause() {
+  SourceLocation ClauseNameLoc, OpenLoc, CloseLoc;
+  SmallVector<Expr *, 4> ValExprs;
+  if (ParseOpenMPExprListClause(OMPC_counts, ClauseNameLoc, OpenLoc, CloseLoc,
+                                ValExprs))
+    return nullptr;
+
+  return Actions.OpenMP().ActOnOpenMPCountsClause(ValExprs, ClauseNameLoc,
+                                                  OpenLoc, CloseLoc);
+}
+
 OMPClause *Parser::ParseOpenMPLoopRangeClause() {
   SourceLocation ClauseNameLoc = ConsumeToken();
   SourceLocation FirstLoc, CountLoc;
@@ -3433,8 +3448,13 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
     Clause = ParseOpenMPPermutationClause();
     break;
   case OMPC_counts:
-    // TODO: Implement ParseOpenMPCountsClause() - not yet worked on
-    SkipUntil(tok::r_paren, tok::comma, StopBeforeMatch);
+    if (!FirstClause) {
+      Diag(Tok, diag::err_omp_more_one_clause)
+          << getOpenMPDirectiveName(DKind, OMPVersion)
+          << getOpenMPClauseName(CKind) << 0;
+      ErrorFound = true;
+    }
+    Clause = ParseOpenMPCountsClause();
     break;
   case OMPC_uses_allocators:
     Clause = ParseOpenMPUsesAllocatorClause(DKind);
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 9358316132ca4..67466f5ad5f8f 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -6467,13 +6467,15 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective(
            "reverse directive does not support any clauses");
     Res = ActOnOpenMPReverseDirective(AStmt, StartLoc, EndLoc);
     break;
-  case OMPD_split:
-    // TODO: Add counts clause support - not yet worked on
-    // Currently only supports basic split without clauses.
-    assert(ClausesWithImplicit.empty() &&
-           "split directive does not support any clauses");
-    Res = ActOnOpenMPSplitDirective(AStmt, StartLoc, EndLoc);
+  case OMPD_split: {
+    const OMPCountsClause *CountsClause =
+        OMPExecutableDirective::getSingleClause<OMPCountsClause>(
+            ClausesWithImplicit);
+    assert(CountsClause && "split directive requires counts clause");
+    Res =
+        ActOnOpenMPSplitDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc);
     break;
+  }
   case OMPD_interchange:
     Res = ActOnOpenMPInterchangeDirective(ClausesWithImplicit, AStmt, StartLoc,
                                           EndLoc);
@@ -15915,7 +15917,12 @@ StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt,
                                      buildPreInits(Context, PreInits));
 }
 
-StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(Stmt *AStmt,
+/// Build the AST for \#pragma omp split counts(c1, c2, ...).
+///
+/// Splits the single associated loop into N consecutive loops, where N is the
+/// number of count expressions.
+StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
+                                                 Stmt *AStmt,
                                                  SourceLocation StartLoc,
                                                  SourceLocation EndLoc) {
   ASTContext &Context = getASTContext();
@@ -15925,6 +15932,12 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(Stmt *AStmt,
   if (!AStmt)
     return StmtError();
 
+  const OMPCountsClause *CountsClause =
+      OMPExecutableDirective::getSingleClause<OMPCountsClause>(Clauses);
+  if (!CountsClause)
+    return StmtError();
+
+  // Split applies to a single loop; check it is transformable and get helpers.
   constexpr unsigned NumLoops = 1;
   Stmt *Body = nullptr;
   SmallVector<OMPLoopBasedDirective::HelperExprs, NumLoops> LoopHelpers(
@@ -15937,8 +15950,8 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(Stmt *AStmt,
   // Delay applying the transformation to when template is completely
   // instantiated.
   if (SemaRef.CurContext->isDependentContext())
-    return OMPSplitDirective::Create(Context, StartLoc, EndLoc, AStmt, NumLoops,
-                                     nullptr, nullptr);
+    return OMPSplitDirective::Create(Context, StartLoc, EndLoc, Clauses,
+                                     NumLoops, AStmt, nullptr, nullptr);
 
   assert(LoopHelpers.size() == NumLoops &&
          "Expecting a single-dimensional loop iteration space");
@@ -15954,6 +15967,8 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(Stmt *AStmt,
   SmallVector<Stmt *> PreInits;
   addLoopPreInits(Context, LoopHelper, LoopStmt, OriginalInits[0], PreInits);
 
+  // Type and name of the original loop variable; we create one IV per segment
+  // and assign it to the original var so the body sees the same name.
   auto *IterationVarRef = cast<DeclRefExpr>(LoopHelper.IterationVarRef);
   QualType IVTy = IterationVarRef->getType();
   uint64_t IVWidth = Context.getTypeSize(IVTy);
@@ -15963,153 +15978,109 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(Stmt *AStmt,
   SourceLocation OrigVarLoc = OrigVar->getExprLoc();
   SourceLocation OrigVarLocBegin = OrigVar->getBeginLoc();
   SourceLocation OrigVarLocEnd = OrigVar->getEndLoc();
-
-  // Locations pointing to the transformation.
-  SourceLocation TransformLoc = StartLoc;
-
   // Internal variable names.
   std::string OrigVarName = OrigVar->getNameInfo().getAsString();
 
-  // For Subexpressions with more than one use, we define a lambda
-  // that creates a new AST node at every use.
-  CaptureVars CopyTransformer(SemaRef);
-  auto MakeNumIterations = [&CopyTransformer, &LoopHelper]() -> Expr * {
-    return AssertSuccess(
-        CopyTransformer.TransformExpr(LoopHelper.NumIterations));
-  };
-
-  // For split, we currently divide the loop into two equal parts.
-  // First loop: i = 0; i < n/2; ++i
-  // Second loop: i = n/2; i < n; ++i
-  // TODO: Add counts clause support - not yet worked on
-
-  // Create iteration variable for the first split loop.
-  SmallString<64> FirstIVName(".split.first.iv.");
-  FirstIVName += OrigVarName;
-  VarDecl *FirstIVDecl =
-      buildVarDecl(SemaRef, {}, IVTy, FirstIVName, nullptr, OrigVar);
-  auto MakeFirstRef = [&SemaRef = this->SemaRef, FirstIVDecl, IVTy,
-                       OrigVarLoc]() {
-    return buildDeclRefExpr(SemaRef, FirstIVDecl, IVTy, OrigVarLoc);
-  };
-
-  // Create iteration variable for the second split loop.
-  SmallString<64> SecondIVName(".split.second.iv.");
-  SecondIVName += OrigVarName;
-  VarDecl *SecondIVDecl =
-      buildVarDecl(SemaRef, {}, IVTy, SecondIVName, nullptr, OrigVar);
-  auto MakeSecondRef = [&SemaRef = this->SemaRef, SecondIVDecl, IVTy,
-                        OrigVarLoc]() {
-    return buildDeclRefExpr(SemaRef, SecondIVDecl, IVTy, OrigVarLoc);
-  };
-
-  // Create n/2 expression for the split point.
-  auto *Two = IntegerLiteral::Create(Context, llvm::APInt(IVWidth, 2), IVTy,
-                                     TransformLoc);
-  ExprResult HalfIterations = SemaRef.BuildBinOp(CurScope, TransformLoc, BO_Div,
-                                                 MakeNumIterations(), Two);
-  if (!HalfIterations.isUsable())
-    return StmtError();
-
-  // First loop: init-statement: i = 0
-  auto *Zero = IntegerLiteral::Create(Context, llvm::APInt::getZero(IVWidth),
-                                      FirstIVDecl->getType(), OrigVarLoc);
-  SemaRef.AddInitializerToDecl(FirstIVDecl, Zero, /*DirectInit=*/false);
-  StmtResult FirstInit = new (Context)
-      DeclStmt(DeclGroupRef(FirstIVDecl), OrigVarLocBegin, OrigVarLocEnd);
-  if (!FirstInit.isUsable())
-    return StmtError();
+  // Collect constant count values from the counts clause
+  SmallVector<uint64_t, 4> CountValues;
+  for (Expr *CountExpr : CountsClause->getCountsRefs()) {
+    if (!CountExpr) {
+      return OMPSplitDirective::Create(Context, StartLoc, EndLoc, Clauses,
+                                       NumLoops, AStmt, nullptr, nullptr);
+    }
+    std::optional<llvm::APSInt> OptVal =
+        CountExpr->getIntegerConstantExpr(Context);
+    if (!OptVal || OptVal->isNegative()) {
+      return OMPSplitDirective::Create(Context, StartLoc, EndLoc, Clauses,
+                                       NumLoops, AStmt, nullptr, nullptr);
+    }
+    CountValues.push_back(OptVal->getZExtValue());
+  }
 
-  // First loop: cond-expression (i < n/2)
-  ExprResult FirstCond =
-      SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT,
-                         MakeFirstRef(), HalfIterations.get());
-  if (!FirstCond.isUsable())
+  if (CountValues.empty()) {
+    Diag(CountsClause->getBeginLoc(), diag::err_omp_unexpected_clause_value)
+        << "at least one non-negative integer expression" << "counts";
     return StmtError();
+  }
 
-  // First loop: incr-statement (++i)
-  ExprResult FirstIncr = SemaRef.BuildUnaryOp(
-      CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, MakeFirstRef());
-  if (!FirstIncr.isUsable())
-    return StmtError();
+  // Cumulative segment starts: Starts[0]=0,
+  // Starts[j]=Starts[j-1]+CountValues[j-1]. Example: CountValues [3,5,2] →
+  // Starts [0,3,8,10]. Segment k runs [Starts[k], Starts[k+1]).
+  SmallVector<uint64_t, 4> Starts;
+  Starts.push_back(0);
+  for (size_t j = 0; j < CountValues.size(); ++j)
+    Starts.push_back(Starts.back() + CountValues[j]);
+
+  size_t NumSegments = CountValues.size();
+  SmallVector<Stmt *, 4> SplitLoops;
+
+  for (size_t Seg = 0; Seg < NumSegments; ++Seg) {
+    uint64_t StartVal = Starts[Seg];
+    uint64_t EndVal = Starts[Seg + 1];
+
+    // Segment IV: .split.iv.<Seg>.<OrigVarName>, init to StartVal, bound by
+    // EndVal.
+    SmallString<64> IVName(".split.iv.");
+    IVName += Twine(Seg).str();
+    IVName += ".";
+    IVName += OrigVarName;
+    VarDecl *IVDecl = buildVarDecl(SemaRef, {}, IVTy, IVName, nullptr, OrigVar);
+    auto MakeIVRef = [&SemaRef = this->SemaRef, IVDecl, IVTy, OrigVarLoc]() {
+      return buildDeclRefExpr(SemaRef, IVDecl, IVTy, OrigVarLoc);
+    };
 
-  // First loop: body - update original variable and execute body
-  // We need to create a copy of LoopHelper.Updates that uses FirstIV instead
-  // of the iteration variable. For now, use a simpler approach: directly
-  // assign the first IV to the original variable.
-  SmallVector<Stmt *, 4> FirstBodyStmts;
-  // Create update statement: origVar = .split.first.iv
-  // We'll use a BinaryOperator for assignment
-  ExprResult FirstUpdateExpr = SemaRef.BuildBinOp(
-      CurScope, OrigVarLoc, BO_Assign, OrigVar, MakeFirstRef());
-  if (!FirstUpdateExpr.isUsable())
-    return StmtError();
-  FirstBodyStmts.push_back(FirstUpdateExpr.get());
-  if (auto *CXXRangeFor = dyn_cast<CXXForRangeStmt>(LoopStmt))
-    FirstBodyStmts.push_back(CXXRangeFor->getLoopVarStmt());
-  FirstBodyStmts.push_back(Body);
-  auto *FirstBody =
-      CompoundStmt::Create(Context, FirstBodyStmts, FPOptionsOverride(),
-                           Body->getBeginLoc(), Body->getEndLoc());
+    llvm::APInt StartAP(IVWidth, StartVal, /*isSigned=*/false);
+    llvm::APInt EndAP(IVWidth, EndVal, /*isSigned=*/false);
+    auto *StartLit = IntegerLiteral::Create(Context, StartAP, IVTy, OrigVarLoc);
+    auto *EndLit = IntegerLiteral::Create(Context, EndAP, IVTy, OrigVarLoc);
 
-  // Create first loop
-  auto *FirstLoop = new (Context)
-      ForStmt(Context, FirstInit.get(), FirstCond.get(), nullptr,
-              FirstIncr.get(), FirstBody, LoopHelper.Init->getBeginLoc(),
-              LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc());
+    SemaRef.AddInitializerToDecl(IVDecl, StartLit, /*DirectInit=*/false);
+    StmtResult InitStmt = new (Context)
+        DeclStmt(DeclGroupRef(IVDecl), OrigVarLocBegin, OrigVarLocEnd);
+    if (!InitStmt.isUsable())
+      return StmtError();
 
-  // Second loop: init-statement (i = n/2)
-  SemaRef.AddInitializerToDecl(SecondIVDecl, HalfIterations.get(),
-                               /*DirectInit=*/false);
-  StmtResult SecondInit = new (Context)
-      DeclStmt(DeclGroupRef(SecondIVDecl), OrigVarLocBegin, OrigVarLocEnd);
-  if (!SecondInit.isUsable())
-    return StmtError();
+    ExprResult CondExpr = SemaRef.BuildBinOp(
+        CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, MakeIVRef(), EndLit);
+    if (!CondExpr.isUsable())
+      return StmtError();
 
-  // Second loop: cond-expression (i < n)
-  ExprResult SecondCond =
-      SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT,
-                         MakeSecondRef(), MakeNumIterations());
-  if (!SecondCond.isUsable())
-    return StmtError();
+    ExprResult IncrExpr = SemaRef.BuildUnaryOp(
+        CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, MakeIVRef());
+    if (!IncrExpr.isUsable())
+      return StmtError();
 
-  // Second loop: incr-statement (++i)
-  ExprResult SecondIncr = SemaRef.BuildUnaryOp(
-      CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, MakeSecondRef());
-  if (!SecondIncr.isUsable())
-    return StmtError();
+    // orig_var = IV so the original body sees the same variable.
+    ExprResult UpdateExpr = SemaRef.BuildBinOp(CurScope, OrigVarLoc, BO_Assign,
+                                               OrigVar, MakeIVRef());
+    if (!UpdateExpr.isUsable())
+      return StmtError();
 
-  // Second loop: body - update original variable and execute body
-  SmallVector<Stmt *, 4> SecondBodyStmts;
-  // Create update statement: origVar = .split.second.iv
-  ExprResult SecondUpdateExpr = SemaRef.BuildBinOp(
-      CurScope, OrigVarLoc, BO_Assign, OrigVar, MakeSecondRef());
-  if (!SecondUpdateExpr.isUsable())
-    return StmtError();
-  SecondBodyStmts.push_back(SecondUpdateExpr.get());
-  if (auto *CXXRangeFor = dyn_cast<CXXForRangeStmt>(LoopStmt))
-    SecondBodyStmts.push_back(CXXRangeFor->getLoopVarStmt());
-  SecondBodyStmts.push_back(Body);
-  auto *SecondBody =
-      CompoundStmt::Create(Context, SecondBodyStmts, FPOptionsOverride(),
-                           Body->getBeginLoc(), Body->getEndLoc());
+    SmallVector<Stmt *, 4> BodyStmts;
+    BodyStmts.push_back(UpdateExpr.get());
+    if (auto *CXXRangeFor = dyn_cast<CXXForRangeStmt>(LoopStmt))
+      BodyStmts.push_back(CXXRangeFor->getLoopVarStmt());
+    BodyStmts.push_back(Body);
 
-  // Create second loop
-  auto *SecondLoop = new (Context)
-      ForStmt(Context, SecondInit.get(), SecondCond.get(), nullptr,
-              SecondIncr.get(), SecondBody, LoopHelper.Init->getBeginLoc(),
-              LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc());
+    auto *LoopBody =
+        CompoundStmt::Create(Context, BodyStmts, FPOptionsOverride(),
+                             Body->getBeginLoc(), Body->getEndLoc());
 
-  // Combine both loops into a compound statement
-  SmallVector<Stmt *, 2> SplitLoops;
-  SplitLoops.push_back(FirstLoop);
-  SplitLoops.push_back(SecondLoop);
-  auto *SplitStmt =
-      CompoundStmt::Create(Context, SplitLoops, FPOptionsOverride(),
-                           FirstLoop->getBeginLoc(), SecondLoop->getEndLoc());
+    auto *For = new (Context)
+        ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr,
+                IncrExpr.get(), LoopBody, LoopHelper.Init->getBeginLoc(),
+                LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc());
+    // Push the splitted for loops into SplitLoops
+    SplitLoops.push_back(For);
+  }
+  // Combine all the loops into a compound statement
+  auto *SplitStmt = CompoundStmt::Create(
+      Context, SplitLoops, FPOptionsOverride(),
+      SplitLoops.front()->getBeginLoc(), SplitLoops.back()->getEndLoc());
 
-  return OMPSplitDirective::Create(Context, StartLoc, EndLoc, AStmt, NumLoops,
-                                   SplitStmt, buildPreInits(Context, PreInits));
+  return OMPSplitDirective::Create(Context, StartLoc, EndLoc, Clauses, NumLoops,
+                                   AStmt, SplitStmt,
+                                   buildPreInits(Context, PreInits));
 }
 
 StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective(
@@ -18060,6 +18031,31 @@ OMPClause *SemaOpenMP::ActOnOpenMPSizesClause(ArrayRef<Expr *> SizeExprs,
                                 SanitizedSizeExprs);
 }
 
+OMPClause *SemaOpenMP::ActOnOpenMPCountsClause(ArrayRef<Expr *> CountExprs,
+                                               SourceLocation StartLoc,
+                                               SourceLocation LParenLoc,
+                                               SourceLocation EndLoc) {
+  SmallVector<Expr *> SanitizedCountExprs(CountExprs);
+
+  for (Expr *&CountExpr : SanitizedCountExprs) {
+    if (!CountExpr)
+      continue;
+
+    bool IsValid = isNonNegativeIntegerValue(CountExpr, SemaRef, OMPC_counts,
+                                             /*StrictlyPositive=*/false);
+
+    QualType CountTy = CountExpr->getType();
+    if (!CountTy->isIntegerType())
+      IsValid = false;
+
+    if (!CountExpr->isInstantiationDependent() && !IsValid)
+      CountExpr = nullptr;
+  }
+
+  return OMPCountsClause::Create(getASTContext(), StartLoc, LParenLoc, EndLoc,
+                                 SanitizedCountExprs);
+}
+
 OMPClause *SemaOpenMP::ActOnOpenMPPermutationClause(ArrayRef<Expr *> PermExprs,
                                                     SourceLocation StartLoc,
                                                     SourceLocation LParenLoc,
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index b22c500048854..bc78f64f0e095 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1769,6 +1769,14 @@ class TreeTransform {
                                                      EndLoc);
   }
 
+  OMPClause *RebuildOMPCountsClause(ArrayRef<Expr *> Counts,
+                                    SourceLocation StartLoc,
+                                    SourceLocation LParenLoc,
+                                    SourceLocation EndLoc) {
+    return getSema().OpenMP().ActOnOpenMPCountsClause(Counts, StartLoc,
+                                                      LParenLoc, EndLoc);
+  }
+
   /// Build a new OpenMP 'permutation' clause.
   OMPClause *RebuildOMPPermutationClause(ArrayRef<Expr *> PermExprs,
                                          SourceLocation StartLoc,
@@ -10626,6 +10634,27 @@ OMPClause *TreeTransform<Derived>::TransformOMPSizesClause(OMPSizesClause *C) {
                                C->getLParenLoc(), C->getEndLoc());
 }
 
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPCountsClause(OMPCountsClause *C) {
+  SmallVector<Expr *, 4> TransformedCounts;
+  TransformedCounts.reserve(C->getNumCounts());
+  for (Expr *E : C->getCountsRefs()) {
+    if (!E) {
+      TransformedCounts.push_back(nullptr);
+      continue;
+    }
+
+    ExprResult T = getDerived().TransformExpr(E);
+    if (T.isInvalid())
+      return nullptr;
+    TransformedCounts.push_back(T.get());
+  }
+
+  return RebuildOMPCountsClause(TransformedCounts, C->getBeginLoc(),
+                                C->getLParenLoc(), C->getEndLoc());
+}
+
 template <typename Derived>
 OMPClause *
 TreeTransform<Derived>::TransformOMPPermutationClause(OMPPermutationClause *C) {

>From 6f37438ae3a9aca091fd193f627cb68f18847fdf Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Thu, 19 Mar 2026 04:34:58 -0400
Subject: [PATCH 12/32] serialisation

---
 clang/lib/AST/StmtProfile.cpp             |  6 ++++++
 clang/lib/Serialization/ASTReader.cpp     | 11 +++++++++++
 clang/lib/Serialization/ASTReaderStmt.cpp |  5 ++---
 clang/lib/Serialization/ASTWriter.cpp     |  7 +++++++
 clang/tools/libclang/CIndex.cpp           |  5 +++++
 5 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 6f929c060ebe7..c75652e5c1dd3 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -498,6 +498,12 @@ void OMPClauseProfiler::VisitOMPSizesClause(const OMPSizesClause *C) {
       Profiler->VisitExpr(E);
 }
 
+void OMPClauseProfiler::VisitOMPCountsClause(const OMPCountsClause *C) {
+  for (auto *E : C->getCountsRefs())
+    if (E)
+      Profiler->VisitExpr(E);
+}
+
 void OMPClauseProfiler::VisitOMPPermutationClause(
     const OMPPermutationClause *C) {
   for (Expr *E : C->getArgsRefs())
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 4ecdb563a1de2..10f5e106b9d97 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -11437,6 +11437,11 @@ OMPClause *OMPClauseReader::readClause() {
     C = OMPSizesClause::CreateEmpty(Context, NumSizes);
     break;
   }
+  case llvm::omp::OMPC_counts: {
+    unsigned NumCounts = Record.readInt();
+    C = OMPCountsClause::CreateEmpty(Context, NumCounts);
+    break;
+  }
   case llvm::omp::OMPC_permutation: {
     unsigned NumLoops = Record.readInt();
     C = OMPPermutationClause::CreateEmpty(Context, NumLoops);
@@ -11850,6 +11855,12 @@ void OMPClauseReader::VisitOMPSizesClause(OMPSizesClause *C) {
   C->setLParenLoc(Record.readSourceLocation());
 }
 
+void OMPClauseReader::VisitOMPCountsClause(OMPCountsClause *C) {
+  for (Expr *&E : C->getCountsRefs())
+    E = Record.readSubExpr();
+  C->setLParenLoc(Record.readSourceLocation());
+}
+
 void OMPClauseReader::VisitOMPPermutationClause(OMPPermutationClause *C) {
   for (Expr *&E : C->getArgsRefs())
     E = Record.readSubExpr();
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 79a6a03b3f276..fb81e4fefdebb 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -3693,9 +3693,8 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
 
     case STMT_OMP_SPLIT_DIRECTIVE: {
       unsigned NumLoops = Record[ASTStmtReader::NumStmtFields];
-      assert(Record[ASTStmtReader::NumStmtFields + 1] == 0 &&
-             "Split directive has no clauses");
-      S = OMPSplitDirective::CreateEmpty(Context, NumLoops);
+      unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1];
+      S = OMPSplitDirective::CreateEmpty(Context, NumClauses, NumLoops);
       break;
     }
 
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 20a01f86e95ac..e10e402b1cbf8 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -8070,6 +8070,13 @@ void OMPClauseWriter::VisitOMPSizesClause(OMPSizesClause *C) {
   Record.AddSourceLocation(C->getLParenLoc());
 }
 
+void OMPClauseWriter::VisitOMPCountsClause(OMPCountsClause *C) {
+  Record.push_back(C->getNumCounts());
+  for (Expr *Count : C->getCountsRefs())
+    Record.AddStmt(Count);
+  Record.AddSourceLocation(C->getLParenLoc());
+}
+
 void OMPClauseWriter::VisitOMPPermutationClause(OMPPermutationClause *C) {
   Record.push_back(C->getNumLoops());
   for (Expr *Size : C->getArgsRefs())
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 9016bc6e6e418..f1532d0b064b3 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2362,6 +2362,11 @@ void OMPClauseEnqueue::VisitOMPSizesClause(const OMPSizesClause *C) {
     Visitor->AddStmt(E);
 }
 
+void OMPClauseEnqueue::VisitOMPCountsClause(const OMPCountsClause *C) {
+  for (auto E : C->getCountsRefs())
+    Visitor->AddStmt(E);
+}
+
 void OMPClauseEnqueue::VisitOMPPermutationClause(
     const OMPPermutationClause *C) {
   for (auto E : C->getArgsRefs())

>From e56bb75562377aaa929555f2ceafb57594083221 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Thu, 19 Mar 2026 04:36:32 -0400
Subject: [PATCH 13/32] basic_test

---
 clang/test/OpenMP/split_ast_print.cpp   |  5 +--
 clang/test/OpenMP/split_counts_verify.c | 44 +++++++++++++++++++++++++
 clang/test/OpenMP/split_simple_test.c   | 15 +++++----
 3 files changed, 56 insertions(+), 8 deletions(-)
 create mode 100644 clang/test/OpenMP/split_counts_verify.c

diff --git a/clang/test/OpenMP/split_ast_print.cpp b/clang/test/OpenMP/split_ast_print.cpp
index b24eae4a9bead..07dd7b28e1a13 100644
--- a/clang/test/OpenMP/split_ast_print.cpp
+++ b/clang/test/OpenMP/split_ast_print.cpp
@@ -16,9 +16,10 @@ extern "C" void body(...);
 // PRINT-LABEL: void foo(
 // DUMP-LABEL:  FunctionDecl {{.*}} foo
 void foo(int n) {
-  // PRINT:     #pragma omp split
+  // PRINT:     #pragma omp split counts(2, 3)
   // DUMP:      OMPSplitDirective
-  #pragma omp split
+  // DUMP: OMPCountsClause
+  #pragma omp split counts(2, 3)
   // PRINT: for (int i = 0; i < n; ++i)
   // DUMP:      ForStmt
   for (int i = 0; i < n; ++i)
diff --git a/clang/test/OpenMP/split_counts_verify.c b/clang/test/OpenMP/split_counts_verify.c
new file mode 100644
index 0000000000000..4b2ec2ca20bcd
--- /dev/null
+++ b/clang/test/OpenMP/split_counts_verify.c
@@ -0,0 +1,44 @@
+/*
+ * Verify #pragma omp split counts(c1, c2, ...) at AST, IR, and runtime.
+ * counts(3, 5, 2) splits 10 iterations into: [0..3), [3..8), [8..10).
+ * Sum 0+1+...+9 = 45.
+ */
+// REQUIRES: x86-registered-target
+
+// 1) Syntax and semantics only
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s
+// expected-no-diagnostics
+
+// 2) AST dump should show OMPSplitDirective with OMPCountsClause node.
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-dump %s 2>&1 | FileCheck %s --check-prefix=AST
+
+// 3) Emit LLVM: three sequential loops (multiple phi/br for loop structure)
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=60 -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=IR
+
+// 4) Compile and run: exit 0 if sum == 45
+// RUN: %clang -fopenmp -fopenmp-version=60 -O0 %s -o %t.exe
+// RUN: %t.exe
+
+int main(void) {
+  const int n = 10;
+  int sum = 0;
+
+#pragma omp split counts(3, 5, 2)
+  for (int i = 0; i < n; ++i) {
+    sum += i;
+  }
+
+  return (sum == 45) ? 0 : 1;
+}
+
+// AST: OMPSplitDirective
+// AST: OMPCountsClause
+
+// IR: define
+// IR: .split.iv.0
+// IR: icmp slt i32 {{.*}}, 3
+// IR: .split.iv.1
+// IR: icmp slt i32 {{.*}}, 8
+// IR: .split.iv.2
+// IR: icmp slt i32 {{.*}}, 10
+// IR: icmp eq i32 {{.*}}, 45
diff --git a/clang/test/OpenMP/split_simple_test.c b/clang/test/OpenMP/split_simple_test.c
index 62dbc1cd861e5..021759e6a70e6 100644
--- a/clang/test/OpenMP/split_simple_test.c
+++ b/clang/test/OpenMP/split_simple_test.c
@@ -1,16 +1,15 @@
 /*
- * Simple test for #pragma omp split: one canonical for-loop is transformed
- * into two loops (first half and second half of iterations).
+ * Simple test for #pragma omp split counts: one for-loop is transformed
+ * into two loops (counts(5, 5) => [0..5) and [5..10)).
  */
 // Verify the split directive compiles and emits IR (two sequential loops).
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -triple x86_64-unknown-unknown
-// -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
 
 int main(void) {
   const int n = 10;
   int sum = 0;
 
-#pragma omp split
+#pragma omp split counts(5, 5)
   for (int i = 0; i < n; ++i) {
     sum += i;
   }
@@ -20,5 +19,9 @@ int main(void) {
 
 // CHECK: define
 // CHECK: load
-// Split produces two sequential loops; ensure we have loop structure in IR.
+// Split produces two sequential loops (counts(5, 5) => bounds 5, 10).
+// CHECK: .split.iv
+// CHECK: icmp slt i32 {{.*}}, 5
+// CHECK: .split.iv
+// CHECK: icmp slt i32 {{.*}}, 10
 // CHECK: br i1

>From bb2d2ce0de05a544020754d44eb5dfd1b74d1805 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Thu, 19 Mar 2026 06:49:54 -0400
Subject: [PATCH 14/32] test_fix

---
 clang/test/OpenMP/split_counts_verify.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/clang/test/OpenMP/split_counts_verify.c b/clang/test/OpenMP/split_counts_verify.c
index 4b2ec2ca20bcd..3eec02f974e07 100644
--- a/clang/test/OpenMP/split_counts_verify.c
+++ b/clang/test/OpenMP/split_counts_verify.c
@@ -15,9 +15,6 @@
 // 3) Emit LLVM: three sequential loops (multiple phi/br for loop structure)
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=60 -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=IR
 
-// 4) Compile and run: exit 0 if sum == 45
-// RUN: %clang -fopenmp -fopenmp-version=60 -O0 %s -o %t.exe
-// RUN: %t.exe
 
 int main(void) {
   const int n = 10;

>From 2d9f24417e72216a8b795d2a8604eb18d3203ded Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Fri, 20 Mar 2026 04:36:15 -0400
Subject: [PATCH 15/32] revised

---
 clang/include/clang/AST/StmtOpenMP.h  |  2 +-
 clang/lib/AST/OpenMPClause.cpp        | 10 +++-------
 clang/lib/CodeGen/CGStmtOpenMP.cpp    |  2 +-
 clang/lib/Sema/SemaOpenMP.cpp         | 15 ++++-----------
 clang/test/OpenMP/split_ast_print.cpp |  2 ++
 5 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index bdaf73c0a6607..3d8962afa2b7c 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -6067,7 +6067,7 @@ class OMPFuseDirective final
 
 /// Represents the '#pragma omp split' loop transformation directive.
 ///
-/// \code{c}
+/// \code{.c}
 ///   #pragma omp split
 ///   for (int i = 0; i < n; ++i)
 ///     ...
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index ab6fa1c673411..f6c03eba668c8 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -2005,13 +2005,9 @@ void OMPClausePrinter::VisitOMPSizesClause(OMPSizesClause *Node) {
 
 void OMPClausePrinter::VisitOMPCountsClause(OMPCountsClause *Node) {
   OS << "counts(";
-  bool First = true;
-  for (auto *Count : Node->getCountsRefs()) {
-    if (!First)
-      OS << ", ";
-    Count->printPretty(OS, nullptr, Policy, 0);
-    First = false;
-  }
+  llvm::interleaveComma(Node->getCountsRefs(), OS, [&](const Expr *E) {
+    E->printPretty(OS, nullptr, Policy, 0);
+  });
   OS << ")";
 }
 
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 2acced1f0da1a..59d0e6825a975 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -3206,7 +3206,7 @@ void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) {
 }
 
 void CodeGenFunction::EmitOMPSplitDirective(const OMPSplitDirective &S) {
-  // Emit the de-sugared statement (the two split loops).
+  // Emit the de-sugared statement (the split loops).
   OMPTransformDirectiveScopeRAII SplitScope(*this, &S);
   EmitStmt(S.getTransformedStmt());
 }
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 67466f5ad5f8f..8b097694454cc 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -15984,24 +15984,19 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
   // Collect constant count values from the counts clause
   SmallVector<uint64_t, 4> CountValues;
   for (Expr *CountExpr : CountsClause->getCountsRefs()) {
-    if (!CountExpr) {
+    if (!CountExpr)
       return OMPSplitDirective::Create(Context, StartLoc, EndLoc, Clauses,
                                        NumLoops, AStmt, nullptr, nullptr);
-    }
     std::optional<llvm::APSInt> OptVal =
         CountExpr->getIntegerConstantExpr(Context);
-    if (!OptVal || OptVal->isNegative()) {
+    if (!OptVal || OptVal->isNegative())
       return OMPSplitDirective::Create(Context, StartLoc, EndLoc, Clauses,
                                        NumLoops, AStmt, nullptr, nullptr);
-    }
     CountValues.push_back(OptVal->getZExtValue());
   }
 
-  if (CountValues.empty()) {
-    Diag(CountsClause->getBeginLoc(), diag::err_omp_unexpected_clause_value)
-        << "at least one non-negative integer expression" << "counts";
+  if (CountValues.empty())
     return StmtError();
-  }
 
   // Cumulative segment starts: Starts[0]=0,
   // Starts[j]=Starts[j-1]+CountValues[j-1]. Example: CountValues [3,5,2] →
@@ -16021,9 +16016,7 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
     // Segment IV: .split.iv.<Seg>.<OrigVarName>, init to StartVal, bound by
     // EndVal.
     SmallString<64> IVName(".split.iv.");
-    IVName += Twine(Seg).str();
-    IVName += ".";
-    IVName += OrigVarName;
+    IVName += (Twine(Seg) + "." + OrigVarName).str();
     VarDecl *IVDecl = buildVarDecl(SemaRef, {}, IVTy, IVName, nullptr, OrigVar);
     auto MakeIVRef = [&SemaRef = this->SemaRef, IVDecl, IVTy, OrigVarLoc]() {
       return buildDeclRefExpr(SemaRef, IVDecl, IVTy, OrigVarLoc);
diff --git a/clang/test/OpenMP/split_ast_print.cpp b/clang/test/OpenMP/split_ast_print.cpp
index 07dd7b28e1a13..2d53f589500cd 100644
--- a/clang/test/OpenMP/split_ast_print.cpp
+++ b/clang/test/OpenMP/split_ast_print.cpp
@@ -15,6 +15,8 @@ extern "C" void body(...);
 
 // PRINT-LABEL: void foo(
 // DUMP-LABEL:  FunctionDecl {{.*}} foo
+// OpenMP spec: one counts item may be the keyword (e.g. omp_fill) when loop
+// bound is not constant; this test uses literal counts for ast-print check.
 void foo(int n) {
   // PRINT:     #pragma omp split counts(2, 3)
   // DUMP:      OMPSplitDirective

>From 0a02166b82a2603ec0c5cceefdffe4749d690f65 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Sat, 21 Mar 2026 10:08:20 -0400
Subject: [PATCH 16/32] omp_fill

---
 .../clang/Basic/DiagnosticSemaKinds.td        |   6 +
 clang/include/clang/Sema/SemaOpenMP.h         |   9 +
 clang/lib/AST/OpenMPClause.cpp                |   7 +
 clang/lib/Parse/ParseOpenMP.cpp               |  37 +++-
 clang/lib/Sema/SemaOpenMP.cpp                 | 162 +++++++++++++++---
 5 files changed, 198 insertions(+), 23 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 6b9fa4a257397..a8e0308195fc8 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11168,6 +11168,12 @@ def err_omp_bind_required_on_loop : Error<
   "construct">;
 def err_omp_loop_reduction_clause : Error<
   "'reduction' clause not allowed with '#pragma omp loop bind(teams)'">;
+def err_omp_split_counts_multiple_omp_fill : Error<
+  "at most one 'omp_fill' may appear in the 'counts' clause">;
+def err_omp_split_counts_omp_fill_not_last : Error<
+  "'omp_fill' must be the last item in the 'counts' clause">;
+def err_omp_split_counts_omp_fill_no_trip : Error<
+  "'omp_fill' requires a computable loop iteration count">;
 def warn_break_binds_to_switch : Warning<
   "'break' is bound to loop, GCC binds it to switch">,
   InGroup<GccCompat>;
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index c4c1b2ad33f71..11df5816d2b35 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -42,6 +42,7 @@ class FunctionScopeInfo;
 
 class DeclContext;
 class DeclGroupRef;
+class EnumConstantDecl;
 class ParsedAttr;
 class Scope;
 
@@ -921,6 +922,8 @@ class SemaOpenMP : public SemaBase {
                                      SourceLocation StartLoc,
                                      SourceLocation LParenLoc,
                                      SourceLocation EndLoc);
+  /// Build the OpenMP \c omp_fill placeholder for a \c counts clause.
+  ExprResult ActOnOpenMPCountsFillExpr(SourceLocation Loc);
   /// Called on well-form 'permutation' clause after parsing its arguments.
   OMPClause *ActOnOpenMPPermutationClause(ArrayRef<Expr *> PermExprs,
                                           SourceLocation StartLoc,
@@ -1646,6 +1649,12 @@ class SemaOpenMP : public SemaBase {
 
   /// Device number identifier specified by the context selector.
   StringRef DeviceNumID;
+
+  /// Implicit enumerator used to represent \c omp_fill in \c counts clauses.
+  EnumConstantDecl *OMPFillCountMarker = nullptr;
+
+  EnumConstantDecl *getOrCreateOMPFillCountMarker();
+  bool isOMPFillCountExpr(const Expr *E) const;
 };
 
 } // namespace clang
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index f6c03eba668c8..d68a53189ee65 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -15,6 +15,7 @@
 #include "clang/AST/Attr.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclOpenMP.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/ExprOpenMP.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/OpenMPKinds.h"
@@ -2006,6 +2007,12 @@ void OMPClausePrinter::VisitOMPSizesClause(OMPSizesClause *Node) {
 void OMPClausePrinter::VisitOMPCountsClause(OMPCountsClause *Node) {
   OS << "counts(";
   llvm::interleaveComma(Node->getCountsRefs(), OS, [&](const Expr *E) {
+    if (const auto *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts()))
+      if (const auto *ECD = dyn_cast<EnumConstantDecl>(DRE->getDecl()))
+        if (ECD->isImplicit() && ECD->getName() == "omp_fill") {
+          OS << "omp_fill";
+          return;
+        }
     E->printPretty(OS, nullptr, Policy, 0);
   });
   OS << ")";
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index 65345462ae740..b5864baea600d 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -2993,9 +2993,42 @@ OMPClause *Parser::ParseOpenMPSizesClause() {
 OMPClause *Parser::ParseOpenMPCountsClause() {
   SourceLocation ClauseNameLoc, OpenLoc, CloseLoc;
   SmallVector<Expr *, 4> ValExprs;
-  if (ParseOpenMPExprListClause(OMPC_counts, ClauseNameLoc, OpenLoc, CloseLoc,
-                                ValExprs))
+
+  assert(getOpenMPClauseName(OMPC_counts) == PP.getSpelling(Tok) &&
+         "Expected parsing to start at clause name");
+  ClauseNameLoc = ConsumeToken();
+
+  BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end);
+  if (T.consumeOpen()) {
+    Diag(Tok, diag::err_expected) << tok::l_paren;
+    return nullptr;
+  }
+
+  do {
+    if (Tok.is(tok::identifier) &&
+        Tok.getIdentifierInfo()->getName() == "omp_fill") {
+      SourceLocation FillLoc = Tok.getLocation();
+      ConsumeToken();
+      ExprResult ER = Actions.OpenMP().ActOnOpenMPCountsFillExpr(FillLoc);
+      if (!ER.isUsable()) {
+        T.skipToEnd();
+        return nullptr;
+      }
+      ValExprs.push_back(ER.get());
+    } else {
+      ExprResult Val = ParseConstantExpression();
+      if (!Val.isUsable()) {
+        T.skipToEnd();
+        return nullptr;
+      }
+      ValExprs.push_back(Val.get());
+    }
+  } while (TryConsumeToken(tok::comma));
+
+  if (T.consumeClose())
     return nullptr;
+  OpenLoc = T.getOpenLocation();
+  CloseLoc = T.getCloseLocation();
 
   return Actions.OpenMP().ActOnOpenMPCountsClause(ValExprs, ClauseNameLoc,
                                                   OpenLoc, CloseLoc);
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 8b097694454cc..f20f8ae68cedb 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -6468,10 +6468,9 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective(
     Res = ActOnOpenMPReverseDirective(AStmt, StartLoc, EndLoc);
     break;
   case OMPD_split: {
-    const OMPCountsClause *CountsClause =
-        OMPExecutableDirective::getSingleClause<OMPCountsClause>(
-            ClausesWithImplicit);
-    assert(CountsClause && "split directive requires counts clause");
+    assert(OMPExecutableDirective::getSingleClause<OMPCountsClause>(
+               ClausesWithImplicit) &&
+           "split directive requires counts clause");
     Res =
         ActOnOpenMPSplitDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc);
     break;
@@ -15981,37 +15980,75 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
   // Internal variable names.
   std::string OrigVarName = OrigVar->getNameInfo().getAsString();
 
-  // Collect constant count values from the counts clause
-  SmallVector<uint64_t, 4> CountValues;
+  enum class SplitCountKind { Constant, Fill };
+  SmallVector<std::pair<SplitCountKind, uint64_t>, 4> Entries;
   for (Expr *CountExpr : CountsClause->getCountsRefs()) {
     if (!CountExpr)
       return OMPSplitDirective::Create(Context, StartLoc, EndLoc, Clauses,
                                        NumLoops, AStmt, nullptr, nullptr);
+    if (isOMPFillCountExpr(CountExpr)) {
+      Entries.push_back({SplitCountKind::Fill, 0});
+      continue;
+    }
     std::optional<llvm::APSInt> OptVal =
         CountExpr->getIntegerConstantExpr(Context);
     if (!OptVal || OptVal->isNegative())
       return OMPSplitDirective::Create(Context, StartLoc, EndLoc, Clauses,
                                        NumLoops, AStmt, nullptr, nullptr);
-    CountValues.push_back(OptVal->getZExtValue());
+    Entries.push_back({SplitCountKind::Constant, OptVal->getZExtValue()});
+  }
+
+  if (Entries.empty())
+    return StmtError();
+
+  unsigned NumFill = 0;
+  unsigned FillPos = 0;
+  for (unsigned I = 0; I < Entries.size(); ++I) {
+    if (Entries[I].first == SplitCountKind::Fill) {
+      ++NumFill;
+      FillPos = I;
+    }
+  }
+  if (NumFill > 1) {
+    Diag(CountsClause->getBeginLoc(),
+         diag::err_omp_split_counts_multiple_omp_fill);
+    return StmtError();
+  }
+  if (NumFill == 1 && FillPos != Entries.size() - 1) {
+    Diag(CountsClause->getBeginLoc(),
+         diag::err_omp_split_counts_omp_fill_not_last);
+    return StmtError();
   }
 
-  if (CountValues.empty())
+  Expr *NumIterExpr = LoopHelper.NumIterations;
+  if (NumFill == 1 && !NumIterExpr) {
+    Diag(CountsClause->getBeginLoc(),
+         diag::err_omp_split_counts_omp_fill_no_trip);
     return StmtError();
+  }
 
-  // Cumulative segment starts: Starts[0]=0,
-  // Starts[j]=Starts[j-1]+CountValues[j-1]. Example: CountValues [3,5,2] →
-  // Starts [0,3,8,10]. Segment k runs [Starts[k], Starts[k+1]).
-  SmallVector<uint64_t, 4> Starts;
-  Starts.push_back(0);
-  for (size_t j = 0; j < CountValues.size(); ++j)
-    Starts.push_back(Starts.back() + CountValues[j]);
+  struct SplitSeg {
+    uint64_t Start;
+    bool EndIsTripCount;
+    uint64_t EndConst;
+  };
+  SmallVector<SplitSeg, 4> Segs;
+  uint64_t Cur = 0;
+  for (unsigned I = 0; I < Entries.size(); ++I) {
+    const auto &Ent = Entries[I];
+    if (Ent.first == SplitCountKind::Constant) {
+      uint64_t Nxt = Cur + Ent.second;
+      Segs.push_back({Cur, false, Nxt});
+      Cur = Nxt;
+    } else
+      Segs.push_back({Cur, true, 0});
+  }
 
-  size_t NumSegments = CountValues.size();
+  size_t NumSegments = Segs.size();
   SmallVector<Stmt *, 4> SplitLoops;
 
   for (size_t Seg = 0; Seg < NumSegments; ++Seg) {
-    uint64_t StartVal = Starts[Seg];
-    uint64_t EndVal = Starts[Seg + 1];
+    uint64_t StartVal = Segs[Seg].Start;
 
     // Segment IV: .split.iv.<Seg>.<OrigVarName>, init to StartVal, bound by
     // EndVal.
@@ -16023,9 +16060,15 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
     };
 
     llvm::APInt StartAP(IVWidth, StartVal, /*isSigned=*/false);
-    llvm::APInt EndAP(IVWidth, EndVal, /*isSigned=*/false);
     auto *StartLit = IntegerLiteral::Create(Context, StartAP, IVTy, OrigVarLoc);
-    auto *EndLit = IntegerLiteral::Create(Context, EndAP, IVTy, OrigVarLoc);
+
+    Expr *EndBound = nullptr;
+    if (Segs[Seg].EndIsTripCount)
+      EndBound = NumIterExpr;
+    else {
+      llvm::APInt EndAP(IVWidth, Segs[Seg].EndConst, /*isSigned=*/false);
+      EndBound = IntegerLiteral::Create(Context, EndAP, IVTy, OrigVarLoc);
+    }
 
     SemaRef.AddInitializerToDecl(IVDecl, StartLit, /*DirectInit=*/false);
     StmtResult InitStmt = new (Context)
@@ -16034,7 +16077,7 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
       return StmtError();
 
     ExprResult CondExpr = SemaRef.BuildBinOp(
-        CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, MakeIVRef(), EndLit);
+        CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, MakeIVRef(), EndBound);
     if (!CondExpr.isUsable())
       return StmtError();
 
@@ -18024,6 +18067,81 @@ OMPClause *SemaOpenMP::ActOnOpenMPSizesClause(ArrayRef<Expr *> SizeExprs,
                                 SanitizedSizeExprs);
 }
 
+EnumConstantDecl *SemaOpenMP::getOrCreateOMPFillCountMarker() {
+  if (OMPFillCountMarker)
+    return OMPFillCountMarker;
+
+  ASTContext &Ctx = getASTContext();
+  TranslationUnitDecl *TU = Ctx.getTranslationUnitDecl();
+  Preprocessor &PP = SemaRef.PP;
+  IdentifierInfo *EnumII =
+      &PP.getIdentifierTable().get("__clang_omp_counts_fill_tag");
+  EnumDecl *ED = EnumDecl::Create(
+      Ctx, TU, SourceLocation{}, SourceLocation{}, EnumII, /*PrevDecl=*/nullptr,
+      /*IsScoped=*/false, /*IsScopedUsingClassTag=*/false, /*IsFixed=*/false);
+  ED->setImplicit(true);
+
+  QualType IntTy = Ctx.IntTy;
+  ED->setIntegerType(IntTy);
+  ED->setPromotionType(IntTy);
+
+  IdentifierInfo *FillII = &PP.getIdentifierTable().get("omp_fill");
+  llvm::APSInt Zero(Ctx.getIntWidth(IntTy),
+                    /*isUnsigned=*/!IntTy->isSignedIntegerType());
+  Zero.setIsSigned(IntTy->isSignedIntegerType());
+  IntegerLiteral *IL =
+      IntegerLiteral::Create(Ctx, Zero, IntTy, SourceLocation{});
+  EnumConstantDecl *ECD = EnumConstantDecl::Create(Ctx, ED, SourceLocation{},
+                                                   FillII, IntTy, IL, Zero);
+  ECD->setImplicit(true);
+
+  ED->addDecl(ECD);
+
+  llvm::SmallVector<Decl *, 1> Elements;
+  Elements.push_back(ECD);
+  unsigned NumNegativeBits = 0;
+  unsigned NumPositiveBits = 0;
+  Ctx.computeEnumBits(Elements, NumNegativeBits, NumPositiveBits);
+
+  ED->completeDefinition(IntTy, IntTy, NumPositiveBits, NumNegativeBits);
+
+  if (!SemaRef.getLangOpts().CPlusPlus)
+    ECD->setType(IntTy);
+  else {
+    QualType EnumTy = Ctx.getTagType(ElaboratedTypeKeyword::None,
+                                     /*Qualifier=*/std::nullopt, ED,
+                                     /*OwnsTag=*/false);
+    ECD->setType(EnumTy);
+  }
+
+  TU->addDecl(ED);
+
+  OMPFillCountMarker = ECD;
+  return ECD;
+}
+
+ExprResult SemaOpenMP::ActOnOpenMPCountsFillExpr(SourceLocation Loc) {
+  EnumConstantDecl *ECD = getOrCreateOMPFillCountMarker();
+  ASTContext &Ctx = getASTContext();
+  QualType T = ECD->getType();
+  return DeclRefExpr::Create(Ctx, NestedNameSpecifierLoc(), SourceLocation(),
+                             ECD, /*RefersToEnclosingVariableOrCapture=*/false,
+                             Loc, T, VK_PRValue, ECD);
+}
+
+bool SemaOpenMP::isOMPFillCountExpr(const Expr *E) const {
+  if (!E)
+    return false;
+  E = E->IgnoreParenImpCasts();
+  const auto *DRE = dyn_cast<DeclRefExpr>(E);
+  if (!DRE)
+    return false;
+  if (OMPFillCountMarker)
+    return DRE->getDecl() == OMPFillCountMarker;
+  const auto *ECD = dyn_cast<EnumConstantDecl>(DRE->getDecl());
+  return ECD && ECD->isImplicit() && ECD->getName() == "omp_fill";
+}
+
 OMPClause *SemaOpenMP::ActOnOpenMPCountsClause(ArrayRef<Expr *> CountExprs,
                                                SourceLocation StartLoc,
                                                SourceLocation LParenLoc,
@@ -18033,6 +18151,8 @@ OMPClause *SemaOpenMP::ActOnOpenMPCountsClause(ArrayRef<Expr *> CountExprs,
   for (Expr *&CountExpr : SanitizedCountExprs) {
     if (!CountExpr)
       continue;
+    if (isOMPFillCountExpr(CountExpr))
+      continue;
 
     bool IsValid = isNonNegativeIntegerValue(CountExpr, SemaRef, OMPC_counts,
                                              /*StrictlyPositive=*/false);

>From 1c1961a02bcd0ce4fe01244c3abda52d7a3c0aca Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Sat, 21 Mar 2026 10:09:51 -0400
Subject: [PATCH 17/32] omp_fill-test

---
 clang/test/OpenMP/split_ast_print.cpp   |  6 ++---
 clang/test/OpenMP/split_no_fill_print.c | 34 +++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/OpenMP/split_no_fill_print.c

diff --git a/clang/test/OpenMP/split_ast_print.cpp b/clang/test/OpenMP/split_ast_print.cpp
index 2d53f589500cd..4707d88e9328c 100644
--- a/clang/test/OpenMP/split_ast_print.cpp
+++ b/clang/test/OpenMP/split_ast_print.cpp
@@ -15,13 +15,11 @@ extern "C" void body(...);
 
 // PRINT-LABEL: void foo(
 // DUMP-LABEL:  FunctionDecl {{.*}} foo
-// OpenMP spec: one counts item may be the keyword (e.g. omp_fill) when loop
-// bound is not constant; this test uses literal counts for ast-print check.
 void foo(int n) {
-  // PRINT:     #pragma omp split counts(2, 3)
+  // PRINT:     #pragma omp split counts(2, omp_fill)
   // DUMP:      OMPSplitDirective
   // DUMP: OMPCountsClause
-  #pragma omp split counts(2, 3)
+  #pragma omp split counts(2, omp_fill)
   // PRINT: for (int i = 0; i < n; ++i)
   // DUMP:      ForStmt
   for (int i = 0; i < n; ++i)
diff --git a/clang/test/OpenMP/split_no_fill_print.c b/clang/test/OpenMP/split_no_fill_print.c
new file mode 100644
index 0000000000000..017b4a27846d5
--- /dev/null
+++ b/clang/test/OpenMP/split_no_fill_print.c
@@ -0,0 +1,34 @@
+/* Simple split + counts without omp_fill: syntax, AST dump, ast-print, IR. */
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s
+// expected-no-diagnostics
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s --check-prefix=LLVM
+
+void body(int);
+
+// PRINT-LABEL: void foo(
+// DUMP-LABEL:  FunctionDecl {{.*}} foo
+void foo(int n) {
+  // PRINT:     #pragma omp split counts(3, 7)
+  // DUMP: OMPSplitDirective
+  // DUMP-NEXT: |-OMPCountsClause
+  // DUMP-NEXT: | |-IntegerLiteral {{.*}} 'int' 3
+  // DUMP-NEXT: | `-IntegerLiteral {{.*}} 'int' 7
+  // DUMP-NEXT: {{.*}}`-ForStmt
+#pragma omp split counts(3, 7)
+  // PRINT: for (int i = 0; i < n; ++i)
+  for (int i = 0; i < n; ++i)
+    body(i);
+}
+
+// LLVM-LABEL: define {{.*}}void @foo(
+// LLVM: .split.iv.0.i
+// LLVM: icmp slt i32 {{.*}}, 3
+// LLVM: call void @body(
+// LLVM: store i32 3, ptr %.split.iv.1.i
+// LLVM: icmp slt i32 {{.*}}, 10
+// LLVM: call void @body(

>From 6a3b23249b2e14a7921bdcce61654c8c4b2a1f0d Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Wed, 25 Mar 2026 03:48:57 -0400
Subject: [PATCH 18/32] ast-unittests

---
 clang/include/clang/ASTMatchers/ASTMatchers.h | 20 ++++++
 clang/lib/ASTMatchers/ASTMatchersInternal.cpp |  4 ++
 clang/lib/ASTMatchers/Dynamic/Registry.cpp    |  2 +
 .../ASTMatchers/ASTMatchersNodeTest.cpp       | 65 +++++++++++++++++++
 clang/unittests/ASTMatchers/ASTMatchersTest.h | 14 ++++
 5 files changed, 105 insertions(+)

diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h
index e8e7643e0dddd..a3d82baa69819 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchers.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchers.h
@@ -8781,6 +8781,26 @@ extern const internal::VariadicDynCastAllOfMatcher<Stmt,
                                                    OMPTargetUpdateDirective>
     ompTargetUpdateDirective;
 
+/// Matches any ``#pragma omp split`` executable directive.
+///
+/// Given
+///
+/// \code
+///   #pragma omp split counts(2, 3)
+///   for (int i = 0; i < n; ++i) {}
+/// \endcode
+///
+/// ``ompSplitDirective()`` matches the split directive.
+extern const internal::VariadicDynCastAllOfMatcher<Stmt, OMPSplitDirective>
+    ompSplitDirective;
+
+/// Matches OpenMP ``counts`` clause used by ``#pragma omp split``.
+///
+/// Given ``#pragma omp split counts(1, 2, omp_fill)``, ``ompCountsClause()``
+/// matches the ``counts`` clause node.
+extern const internal::VariadicDynCastAllOfMatcher<OMPClause, OMPCountsClause>
+    ompCountsClause;
+
 /// Matches OpenMP ``default`` clause.
 ///
 /// Given
diff --git a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
index d6860ca660987..5cbf134620e34 100644
--- a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
+++ b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
@@ -1139,6 +1139,10 @@ const internal::VariadicDynCastAllOfMatcher<Stmt, OMPExecutableDirective>
     ompExecutableDirective;
 const internal::VariadicDynCastAllOfMatcher<Stmt, OMPTargetUpdateDirective>
     ompTargetUpdateDirective;
+const internal::VariadicDynCastAllOfMatcher<Stmt, OMPSplitDirective>
+    ompSplitDirective;
+const internal::VariadicDynCastAllOfMatcher<OMPClause, OMPCountsClause>
+    ompCountsClause;
 const internal::VariadicDynCastAllOfMatcher<OMPClause, OMPDefaultClause>
     ompDefaultClause;
 const internal::VariadicDynCastAllOfMatcher<OMPClause, OMPFromClause>
diff --git a/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
index f31684f93f6f3..a04070971f0eb 100644
--- a/clang/lib/ASTMatchers/Dynamic/Registry.cpp
+++ b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
@@ -529,10 +529,12 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(objcTryStmt);
   REGISTER_MATCHER(ofClass);
   REGISTER_MATCHER(ofKind);
+  REGISTER_MATCHER(ompCountsClause);
   REGISTER_MATCHER(ompDefaultClause);
   REGISTER_MATCHER(ompFromClause);
   REGISTER_MATCHER(ompToClause);
   REGISTER_MATCHER(ompExecutableDirective);
+  REGISTER_MATCHER(ompSplitDirective);
   REGISTER_MATCHER(ompTargetUpdateDirective);
   REGISTER_MATCHER(on);
   REGISTER_MATCHER(onImplicitObjectArgument);
diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
index 7338ff5f302f6..b3b9b5cd5a54b 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
@@ -7,7 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "ASTMatchersTest.h"
+#include "clang/AST/OpenMPClause.h"
 #include "clang/AST/PrettyPrinter.h"
+#include "clang/AST/StmtOpenMP.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
 #include "clang/Tooling/Tooling.h"
@@ -3103,6 +3105,69 @@ TEST(ASTMatchersTestOpenMP, OMPTargetUpdateDirective_CountExpression) {
   }
 }
 
+// OpenMP 6 split directive / counts clause
+TEST(ASTMatchersTestOpenMP, OMPSplitDirective) {
+  auto Matcher = stmt(ompSplitDirective(hasStructuredBlock(forStmt())));
+
+  StringRef SplitOk = R"(
+void f() {
+#pragma omp split counts(2, 3)
+  for (int i = 0; i < 10; ++i) {}
+}
+)";
+  EXPECT_TRUE(matchesWithOpenMP60(SplitOk, Matcher));
+
+  StringRef ParallelOnly = R"(
+void f() {
+#pragma omp parallel
+  ;
+}
+)";
+  EXPECT_TRUE(notMatchesWithOpenMP60(ParallelOnly, Matcher));
+}
+
+TEST(ASTMatchersTestOpenMP, OMPSplitDirective_HasCountsClause) {
+  auto Matcher = stmt(ompSplitDirective(hasAnyClause(ompCountsClause())));
+
+  StringRef Source0 = R"(
+void f() {
+#pragma omp split counts(2, omp_fill)
+  for (int i = 0; i < 10; ++i) {}
+}
+)";
+  EXPECT_TRUE(matchesWithOpenMP60(Source0, Matcher));
+}
+
+TEST(ASTMatchersTestOpenMP, OMPCountsClause_OmpFillOperand) {
+  StringRef Source0 = R"(
+void f() {
+#pragma omp split counts(1, omp_fill)
+  for (int i = 0; i < 10; ++i) {}
+}
+)";
+  auto AST = tooling::buildASTFromCodeWithArgs(
+      Source0, {"-std=gnu++11", "-target", "i386-unknown-unknown",
+                "-fopenmp=libomp", "-fopenmp-version=60"});
+  ASSERT_TRUE(AST);
+  auto Results = match(ompSplitDirective().bind("split"), AST->getASTContext());
+  ASSERT_EQ(Results.size(), 1u);
+  const auto *Dir = Results[0].getNodeAs<OMPSplitDirective>("split");
+  ASSERT_TRUE(Dir);
+  const OMPCountsClause *Counts = nullptr;
+  for (OMPClause *C : Dir->clauses()) {
+    if ((Counts = dyn_cast<OMPCountsClause>(C)))
+      break;
+  }
+  ASSERT_TRUE(Counts);
+  ASSERT_EQ(Counts->getNumCounts(), 2u);
+  const Expr *FillExpr = Counts->getCountsRefs()[1]->IgnoreParenImpCasts();
+  const auto *DRE = dyn_cast<DeclRefExpr>(FillExpr);
+  ASSERT_TRUE(DRE);
+  const auto *ECD = dyn_cast<EnumConstantDecl>(DRE->getDecl());
+  ASSERT_TRUE(ECD);
+  EXPECT_EQ(ECD->getName(), "omp_fill");
+}
+
 TEST(ASTMatchersTest, Finder_DynamicOnlyAcceptsSomeMatchers) {
   MatchFinder Finder;
   EXPECT_TRUE(Finder.addDynamicMatcher(decl(), nullptr));
diff --git a/clang/unittests/ASTMatchers/ASTMatchersTest.h b/clang/unittests/ASTMatchers/ASTMatchersTest.h
index c1d4daea2c9f1..8a1427d03d60e 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersTest.h
+++ b/clang/unittests/ASTMatchers/ASTMatchersTest.h
@@ -289,6 +289,20 @@ testing::AssertionResult notMatchesWithOpenMP51(const Twine &Code,
                               {"-fopenmp=libomp", "-fopenmp-version=51"});
 }
 
+template <typename T>
+testing::AssertionResult matchesWithOpenMP60(const Twine &Code,
+                                               const T &AMatcher) {
+  return matchesConditionally(Code, AMatcher, true,
+                              {"-fopenmp=libomp", "-fopenmp-version=60"});
+}
+
+template <typename T>
+testing::AssertionResult notMatchesWithOpenMP60(const Twine &Code,
+                                                const T &AMatcher) {
+  return matchesConditionally(Code, AMatcher, false,
+                              {"-fopenmp=libomp", "-fopenmp-version=60"});
+}
+
 template <typename T>
 testing::AssertionResult matchesWithFixedpoint(const std::string &Code,
                                                const T &AMatcher) {

>From b9a14447bb051bc126fbf4cf68bc191cd076b83b Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Wed, 25 Mar 2026 03:53:03 -0400
Subject: [PATCH 19/32] index-test

---
 clang/test/Index/openmp-split.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 clang/test/Index/openmp-split.c

diff --git a/clang/test/Index/openmp-split.c b/clang/test/Index/openmp-split.c
new file mode 100644
index 0000000000000..41ce0e1e87713
--- /dev/null
+++ b/clang/test/Index/openmp-split.c
@@ -0,0 +1,12 @@
+// RUN: c-index-test -test-load-source local %s -fopenmp=libomp -fopenmp-version=60 | FileCheck %s
+
+void test(void) {
+#pragma omp split counts(3, 7)
+  for (int i = 0; i < 20; i += 1)
+    ;
+}
+
+// CHECK: openmp-split.c:4:1: OMPSplitDirective= Extent=[4:1 - 4:31]
+// CHECK: openmp-split.c:4:26: IntegerLiteral= Extent=[4:26 - 4:27]
+// CHECK: openmp-split.c:4:29: IntegerLiteral= Extent=[4:29 - 4:30]
+// CHECK: openmp-split.c:5:3: ForStmt= Extent=[5:3 - 6:6]

>From 7c5af468f72ac52624e939ded9a0584f13ed0ddb Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Wed, 25 Mar 2026 03:54:06 -0400
Subject: [PATCH 20/32] ast-dump

---
 clang/test/AST/ast-dump-openmp-split.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 clang/test/AST/ast-dump-openmp-split.c

diff --git a/clang/test/AST/ast-dump-openmp-split.c b/clang/test/AST/ast-dump-openmp-split.c
new file mode 100644
index 0000000000000..e4eb51becd54a
--- /dev/null
+++ b/clang/test/AST/ast-dump-openmp-split.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=60 -ast-dump %s | FileCheck %s
+//
+// OMPSplitDirective / OMPCountsClause;
+
+void body(int);
+
+void test(void) {
+#pragma omp split counts(3, 7)
+  for (int i = 0; i < 10; ++i)
+    body(i);
+}
+
+// CHECK: OMPSplitDirective
+// CHECK: OMPCountsClause
+// CHECK: IntegerLiteral{{.*}}3
+// CHECK: IntegerLiteral{{.*}}7
+// CHECK: ForStmt
+// CHECK: <<<NULL>>>
+// CHECK: CallExpr

>From 49d07c07aa451ca835424672a6378d1844729b96 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Wed, 25 Mar 2026 03:55:19 -0400
Subject: [PATCH 21/32] format-test

---
 clang/unittests/ASTMatchers/ASTMatchersTest.h | 2 +-
 clang/unittests/Format/FormatTest.cpp         | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/clang/unittests/ASTMatchers/ASTMatchersTest.h b/clang/unittests/ASTMatchers/ASTMatchersTest.h
index 8a1427d03d60e..932e75360405b 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersTest.h
+++ b/clang/unittests/ASTMatchers/ASTMatchersTest.h
@@ -291,7 +291,7 @@ testing::AssertionResult notMatchesWithOpenMP51(const Twine &Code,
 
 template <typename T>
 testing::AssertionResult matchesWithOpenMP60(const Twine &Code,
-                                               const T &AMatcher) {
+                                             const T &AMatcher) {
   return matchesConditionally(Code, AMatcher, true,
                               {"-fopenmp=libomp", "-fopenmp-version=60"});
 }
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index e0b2644249e76..295f37691a4b3 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -19643,6 +19643,11 @@ TEST_F(FormatTest, UnderstandsPragmaOmpTarget) {
       getLLVMStyleWithColumns(26));
 }
 
+TEST_F(FormatTest, UnderstandsPragmaOmpSplit) {
+  verifyFormat("#pragma omp split counts(2, 3)");
+  verifyFormat("#pragma omp split counts(2, omp_fill)");
+}
+
 TEST_F(FormatTest, UnderstandPragmaOption) {
   verifyFormat("#pragma option -C -A");
 

>From 944ec02cada8bdb01df03f5783529fbe0b592d95 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Fri, 27 Mar 2026 07:27:48 -0400
Subject: [PATCH 22/32] revised

---
 clang/include/clang/AST/OpenMPClause.h        |  21 +-
 clang/include/clang/AST/StmtOpenMP.h          |   5 +-
 clang/include/clang/ASTMatchers/ASTMatchers.h |   2 +-
 .../clang/Basic/DiagnosticSemaKinds.td        |   8 +-
 clang/include/clang/Sema/SemaOpenMP.h         |  12 +-
 clang/lib/AST/OpenMPClause.cpp                |  32 ++-
 clang/lib/Parse/ParseOpenMP.cpp               |  20 +-
 clang/lib/Sema/SemaOpenMP.cpp                 | 245 +++++++-----------
 clang/lib/Sema/TreeTransform.h                |  11 +-
 clang/lib/Serialization/ASTReader.cpp         |   2 +
 clang/lib/Serialization/ASTWriter.cpp         |   2 +
 clang/test/AST/ast-dump-openmp-split.c        |   4 +-
 clang/test/Index/openmp-split.c               |   5 +-
 clang/test/OpenMP/split_counts_verify.c       |   7 +-
 ...split_no_fill_print.c => split_omp_fill.c} |  10 +-
 .../{split_simple_test.c => split_test.c}     |   8 +-
 .../ASTMatchers/ASTMatchersNodeTest.cpp       |  11 +-
 clang/unittests/Format/FormatTest.cpp         |   5 -
 18 files changed, 178 insertions(+), 232 deletions(-)
 rename clang/test/OpenMP/{split_no_fill_print.c => split_omp_fill.c} (81%)
 rename clang/test/OpenMP/{split_simple_test.c => split_test.c} (72%)

diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index dbc22e23c3704..d330f7042fc95 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -39,6 +39,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/TrailingObjects.h"
 #include <cassert>
+#include <climits>
 #include <cstddef>
 #include <iterator>
 #include <utility>
@@ -1026,7 +1027,7 @@ class OMPSizesClause final
 /// This represents the 'counts' clause in the '#pragma omp split' directive.
 ///
 /// \code
-/// #pragma omp split counts(3, 5, 2)
+/// #pragma omp split counts(3, omp_fill, 2)
 /// for (int i = 0; i < n; ++i) { ... }
 /// \endcode
 class OMPCountsClause final
@@ -1041,10 +1042,16 @@ class OMPCountsClause final
   /// Number of count expressions in the clause.
   unsigned NumCounts;
 
+  /// 0-based index of the omp_fill list item, or UINT_MAX if absent.
+  unsigned OmpFillIndex;
+
+  /// Source location of the omp_fill keyword.
+  SourceLocation OmpFillLoc;
+
   /// Build an empty clause.
   explicit OMPCountsClause(int NumCounts)
       : OMPClause(llvm::omp::OMPC_counts, SourceLocation(), SourceLocation()),
-        NumCounts(NumCounts) {}
+        NumCounts(NumCounts), OmpFillIndex(UINT_MAX) {}
 
 public:
   /// Build a 'counts' AST node.
@@ -1056,8 +1063,8 @@ class OMPCountsClause final
   /// \param Counts    Content of the clause.
   static OMPCountsClause *Create(const ASTContext &C, SourceLocation StartLoc,
                                  SourceLocation LParenLoc,
-                                 SourceLocation EndLoc,
-                                 ArrayRef<Expr *> Counts);
+                                 SourceLocation EndLoc, ArrayRef<Expr *> Counts,
+                                 unsigned FillIdx, SourceLocation FillLoc);
 
   /// Build an empty 'counts' AST node for deserialization.
   ///
@@ -1074,6 +1081,12 @@ class OMPCountsClause final
   /// Returns the number of list items.
   unsigned getNumCounts() const { return NumCounts; }
 
+  unsigned getOmpFillIndex() const { return OmpFillIndex; }
+  SourceLocation getOmpFillLoc() const { return OmpFillLoc; }
+  bool hasOmpFill() const { return OmpFillIndex != UINT_MAX; }
+  void setOmpFillIndex(unsigned Idx) { OmpFillIndex = Idx; }
+  void setOmpFillLoc(SourceLocation Loc) { OmpFillLoc = Loc; }
+
   /// Returns the count expressions.
   MutableArrayRef<Expr *> getCountsRefs() {
     return getTrailingObjects(NumCounts);
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index 3d8962afa2b7c..dbc76e7df8ecd 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -6068,14 +6068,15 @@ class OMPFuseDirective final
 /// Represents the '#pragma omp split' loop transformation directive.
 ///
 /// \code{.c}
-///   #pragma omp split
+///   #pragma omp split counts(3, omp_fill, 2)
 ///   for (int i = 0; i < n; ++i)
 ///     ...
 /// \endcode
 ///
 /// This directive transforms a single loop into multiple loops based on
 /// index ranges. The transformation splits the iteration space of the loop
-/// into multiple contiguous ranges.
+/// into multiple contiguous ranges. The \c counts clause is required and
+/// exactly one list item must be \c omp_fill.
 class OMPSplitDirective final
     : public OMPCanonicalLoopNestTransformationDirective {
   friend class ASTStmtReader;
diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h
index a3d82baa69819..87b6dbefa7a62 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchers.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchers.h
@@ -8786,7 +8786,7 @@ extern const internal::VariadicDynCastAllOfMatcher<Stmt,
 /// Given
 ///
 /// \code
-///   #pragma omp split counts(2, 3)
+///   #pragma omp split counts(2, omp_fill)
 ///   for (int i = 0; i < n; ++i) {}
 /// \endcode
 ///
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index a8e0308195fc8..4b4c10aedb1d1 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11168,12 +11168,8 @@ def err_omp_bind_required_on_loop : Error<
   "construct">;
 def err_omp_loop_reduction_clause : Error<
   "'reduction' clause not allowed with '#pragma omp loop bind(teams)'">;
-def err_omp_split_counts_multiple_omp_fill : Error<
-  "at most one 'omp_fill' may appear in the 'counts' clause">;
-def err_omp_split_counts_omp_fill_not_last : Error<
-  "'omp_fill' must be the last item in the 'counts' clause">;
-def err_omp_split_counts_omp_fill_no_trip : Error<
-  "'omp_fill' requires a computable loop iteration count">;
+def err_omp_split_counts_not_one_omp_fill : Error<
+  "exactly one 'omp_fill' must appear in the 'counts' clause">;
 def warn_break_binds_to_switch : Warning<
   "'break' is bound to loop, GCC binds it to switch">,
   InGroup<GccCompat>;
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index 11df5816d2b35..38969abd786b3 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -921,9 +921,9 @@ class SemaOpenMP : public SemaBase {
   OMPClause *ActOnOpenMPCountsClause(ArrayRef<Expr *> CountExprs,
                                      SourceLocation StartLoc,
                                      SourceLocation LParenLoc,
-                                     SourceLocation EndLoc);
-  /// Build the OpenMP \c omp_fill placeholder for a \c counts clause.
-  ExprResult ActOnOpenMPCountsFillExpr(SourceLocation Loc);
+                                     SourceLocation EndLoc, unsigned FillIdx,
+                                     SourceLocation FillLoc,
+                                     unsigned FillCount);
   /// Called on well-form 'permutation' clause after parsing its arguments.
   OMPClause *ActOnOpenMPPermutationClause(ArrayRef<Expr *> PermExprs,
                                           SourceLocation StartLoc,
@@ -1649,12 +1649,6 @@ class SemaOpenMP : public SemaBase {
 
   /// Device number identifier specified by the context selector.
   StringRef DeviceNumID;
-
-  /// Implicit enumerator used to represent \c omp_fill in \c counts clauses.
-  EnumConstantDecl *OMPFillCountMarker = nullptr;
-
-  EnumConstantDecl *getOrCreateOMPFillCountMarker();
-  bool isOMPFillCountExpr(const Expr *E) const;
 };
 
 } // namespace clang
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index d68a53189ee65..607f73d41f98f 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -987,16 +987,18 @@ OMPSizesClause *OMPSizesClause::CreateEmpty(const ASTContext &C,
   return new (Mem) OMPSizesClause(NumSizes);
 }
 
-OMPCountsClause *OMPCountsClause::Create(const ASTContext &C,
-                                         SourceLocation StartLoc,
-                                         SourceLocation LParenLoc,
-                                         SourceLocation EndLoc,
-                                         ArrayRef<Expr *> Counts) {
+OMPCountsClause *
+OMPCountsClause::Create(const ASTContext &C, SourceLocation StartLoc,
+                        SourceLocation LParenLoc, SourceLocation EndLoc,
+                        ArrayRef<Expr *> Counts, unsigned FillIdx,
+                        SourceLocation FillLoc) {
   OMPCountsClause *Clause = CreateEmpty(C, Counts.size());
   Clause->setLocStart(StartLoc);
   Clause->setLParenLoc(LParenLoc);
   Clause->setLocEnd(EndLoc);
   Clause->setCountsRefs(Counts);
+  Clause->setOmpFillIndex(FillIdx);
+  Clause->setOmpFillLoc(FillLoc);
   return Clause;
 }
 
@@ -2006,15 +2008,17 @@ void OMPClausePrinter::VisitOMPSizesClause(OMPSizesClause *Node) {
 
 void OMPClausePrinter::VisitOMPCountsClause(OMPCountsClause *Node) {
   OS << "counts(";
-  llvm::interleaveComma(Node->getCountsRefs(), OS, [&](const Expr *E) {
-    if (const auto *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts()))
-      if (const auto *ECD = dyn_cast<EnumConstantDecl>(DRE->getDecl()))
-        if (ECD->isImplicit() && ECD->getName() == "omp_fill") {
-          OS << "omp_fill";
-          return;
-        }
-    E->printPretty(OS, nullptr, Policy, 0);
-  });
+  unsigned FillIdx = Node->getOmpFillIndex();
+  ArrayRef<Expr *> Refs = Node->getCountsRefs();
+  for (unsigned I = 0, N = Refs.size(); I < N; ++I) {
+    if (I)
+      OS << ", ";
+    if (I == FillIdx) {
+      OS << "omp_fill";
+    } else {
+      Refs[I]->printPretty(OS, nullptr, Policy, 0);
+    }
+  }
   OS << ")";
 }
 
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index b5864baea600d..d529139a1907c 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Frontend/OpenMP/DirectiveNameParser.h"
 #include "llvm/Frontend/OpenMP/OMPAssume.h"
 #include "llvm/Frontend/OpenMP/OMPContext.h"
+#include <climits>
 #include <optional>
 
 using namespace clang;
@@ -2993,6 +2994,9 @@ OMPClause *Parser::ParseOpenMPSizesClause() {
 OMPClause *Parser::ParseOpenMPCountsClause() {
   SourceLocation ClauseNameLoc, OpenLoc, CloseLoc;
   SmallVector<Expr *, 4> ValExprs;
+  unsigned FillIdx = UINT_MAX;
+  unsigned FillCount = 0;
+  SourceLocation FillLoc;
 
   assert(getOpenMPClauseName(OMPC_counts) == PP.getSpelling(Tok) &&
          "Expected parsing to start at clause name");
@@ -3007,14 +3011,12 @@ OMPClause *Parser::ParseOpenMPCountsClause() {
   do {
     if (Tok.is(tok::identifier) &&
         Tok.getIdentifierInfo()->getName() == "omp_fill") {
-      SourceLocation FillLoc = Tok.getLocation();
+      if (FillCount == 0)
+        FillIdx = ValExprs.size();
+      ++FillCount;
+      FillLoc = Tok.getLocation();
       ConsumeToken();
-      ExprResult ER = Actions.OpenMP().ActOnOpenMPCountsFillExpr(FillLoc);
-      if (!ER.isUsable()) {
-        T.skipToEnd();
-        return nullptr;
-      }
-      ValExprs.push_back(ER.get());
+      ValExprs.push_back(nullptr);
     } else {
       ExprResult Val = ParseConstantExpression();
       if (!Val.isUsable()) {
@@ -3030,8 +3032,8 @@ OMPClause *Parser::ParseOpenMPCountsClause() {
   OpenLoc = T.getOpenLocation();
   CloseLoc = T.getCloseLocation();
 
-  return Actions.OpenMP().ActOnOpenMPCountsClause(ValExprs, ClauseNameLoc,
-                                                  OpenLoc, CloseLoc);
+  return Actions.OpenMP().ActOnOpenMPCountsClause(
+      ValExprs, ClauseNameLoc, OpenLoc, CloseLoc, FillIdx, FillLoc, FillCount);
 }
 
 OMPClause *Parser::ParseOpenMPLoopRangeClause() {
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index f20f8ae68cedb..98a57dd791caf 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -15980,78 +15980,99 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
   // Internal variable names.
   std::string OrigVarName = OrigVar->getNameInfo().getAsString();
 
-  enum class SplitCountKind { Constant, Fill };
-  SmallVector<std::pair<SplitCountKind, uint64_t>, 4> Entries;
-  for (Expr *CountExpr : CountsClause->getCountsRefs()) {
+  unsigned FillIdx = CountsClause->getOmpFillIndex();
+  if (!CountsClause->hasOmpFill()) {
+    return StmtError();
+  }
+
+  unsigned NumItems = CountsClause->getNumCounts();
+  SmallVector<uint64_t, 4> CountValues(NumItems, 0);
+  ArrayRef<Expr *> Refs = CountsClause->getCountsRefs();
+  for (unsigned I = 0; I < NumItems; ++I) {
+    if (I == FillIdx)
+      continue;
+    Expr *CountExpr = Refs[I];
     if (!CountExpr)
       return OMPSplitDirective::Create(Context, StartLoc, EndLoc, Clauses,
                                        NumLoops, AStmt, nullptr, nullptr);
-    if (isOMPFillCountExpr(CountExpr)) {
-      Entries.push_back({SplitCountKind::Fill, 0});
-      continue;
-    }
     std::optional<llvm::APSInt> OptVal =
         CountExpr->getIntegerConstantExpr(Context);
     if (!OptVal || OptVal->isNegative())
       return OMPSplitDirective::Create(Context, StartLoc, EndLoc, Clauses,
                                        NumLoops, AStmt, nullptr, nullptr);
-    Entries.push_back({SplitCountKind::Constant, OptVal->getZExtValue()});
-  }
-
-  if (Entries.empty())
-    return StmtError();
-
-  unsigned NumFill = 0;
-  unsigned FillPos = 0;
-  for (unsigned I = 0; I < Entries.size(); ++I) {
-    if (Entries[I].first == SplitCountKind::Fill) {
-      ++NumFill;
-      FillPos = I;
-    }
-  }
-  if (NumFill > 1) {
-    Diag(CountsClause->getBeginLoc(),
-         diag::err_omp_split_counts_multiple_omp_fill);
-    return StmtError();
-  }
-  if (NumFill == 1 && FillPos != Entries.size() - 1) {
-    Diag(CountsClause->getBeginLoc(),
-         diag::err_omp_split_counts_omp_fill_not_last);
-    return StmtError();
+    CountValues[I] = OptVal->getZExtValue();
   }
 
   Expr *NumIterExpr = LoopHelper.NumIterations;
-  if (NumFill == 1 && !NumIterExpr) {
-    Diag(CountsClause->getBeginLoc(),
-         diag::err_omp_split_counts_omp_fill_no_trip);
-    return StmtError();
-  }
 
-  struct SplitSeg {
-    uint64_t Start;
-    bool EndIsTripCount;
-    uint64_t EndConst;
+  uint64_t RightSum = 0;
+  for (unsigned I = FillIdx + 1; I < NumItems; ++I)
+    RightSum += CountValues[I];
+
+  auto MakeIntLit = [&](uint64_t Val) {
+    return IntegerLiteral::Create(Context, llvm::APInt(IVWidth, Val), IVTy,
+                                  OrigVarLoc);
   };
-  SmallVector<SplitSeg, 4> Segs;
-  uint64_t Cur = 0;
-  for (unsigned I = 0; I < Entries.size(); ++I) {
-    const auto &Ent = Entries[I];
-    if (Ent.first == SplitCountKind::Constant) {
-      uint64_t Nxt = Cur + Ent.second;
-      Segs.push_back({Cur, false, Nxt});
-      Cur = Nxt;
-    } else
-      Segs.push_back({Cur, true, 0});
-  }
 
-  size_t NumSegments = Segs.size();
+  size_t NumSegments = NumItems;
   SmallVector<Stmt *, 4> SplitLoops;
 
+  uint64_t LeftAccum = 0;
+  uint64_t RightRemaining = RightSum;
+
   for (size_t Seg = 0; Seg < NumSegments; ++Seg) {
-    uint64_t StartVal = Segs[Seg].Start;
+    Expr *StartExpr = nullptr;
+    Expr *EndExpr = nullptr;
+
+    if (Seg < FillIdx) {
+      StartExpr = MakeIntLit(LeftAccum);
+      LeftAccum += CountValues[Seg];
+      EndExpr = MakeIntLit(LeftAccum);
+    } else if (Seg == FillIdx) {
+      StartExpr = MakeIntLit(LeftAccum);
+      if (RightRemaining == 0) {
+        EndExpr = NumIterExpr;
+      } else {
+        ExprResult Sub =
+            SemaRef.BuildBinOp(CurScope, OrigVarLoc, BO_Sub, NumIterExpr,
+                               MakeIntLit(RightRemaining));
+        if (!Sub.isUsable())
+          return StmtError();
+        EndExpr = Sub.get();
+      }
+    } else {
+      if (RightRemaining == RightSum) {
+        if (RightSum == 0)
+          StartExpr = NumIterExpr;
+        else {
+          ExprResult Sub =
+              SemaRef.BuildBinOp(CurScope, OrigVarLoc, BO_Sub, NumIterExpr,
+                                 MakeIntLit(RightRemaining));
+          if (!Sub.isUsable())
+            return StmtError();
+          StartExpr = Sub.get();
+        }
+      } else {
+        ExprResult Sub =
+            SemaRef.BuildBinOp(CurScope, OrigVarLoc, BO_Sub, NumIterExpr,
+                               MakeIntLit(RightRemaining));
+        if (!Sub.isUsable())
+          return StmtError();
+        StartExpr = Sub.get();
+      }
+      RightRemaining -= CountValues[Seg];
+      if (RightRemaining == 0)
+        EndExpr = NumIterExpr;
+      else {
+        ExprResult Sub =
+            SemaRef.BuildBinOp(CurScope, OrigVarLoc, BO_Sub, NumIterExpr,
+                               MakeIntLit(RightRemaining));
+        if (!Sub.isUsable())
+          return StmtError();
+        EndExpr = Sub.get();
+      }
+    }
 
-    // Segment IV: .split.iv.<Seg>.<OrigVarName>, init to StartVal, bound by
-    // EndVal.
     SmallString<64> IVName(".split.iv.");
     IVName += (Twine(Seg) + "." + OrigVarName).str();
     VarDecl *IVDecl = buildVarDecl(SemaRef, {}, IVTy, IVName, nullptr, OrigVar);
@@ -16059,25 +16080,14 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
       return buildDeclRefExpr(SemaRef, IVDecl, IVTy, OrigVarLoc);
     };
 
-    llvm::APInt StartAP(IVWidth, StartVal, /*isSigned=*/false);
-    auto *StartLit = IntegerLiteral::Create(Context, StartAP, IVTy, OrigVarLoc);
-
-    Expr *EndBound = nullptr;
-    if (Segs[Seg].EndIsTripCount)
-      EndBound = NumIterExpr;
-    else {
-      llvm::APInt EndAP(IVWidth, Segs[Seg].EndConst, /*isSigned=*/false);
-      EndBound = IntegerLiteral::Create(Context, EndAP, IVTy, OrigVarLoc);
-    }
-
-    SemaRef.AddInitializerToDecl(IVDecl, StartLit, /*DirectInit=*/false);
+    SemaRef.AddInitializerToDecl(IVDecl, StartExpr, /*DirectInit=*/false);
     StmtResult InitStmt = new (Context)
         DeclStmt(DeclGroupRef(IVDecl), OrigVarLocBegin, OrigVarLocEnd);
     if (!InitStmt.isUsable())
       return StmtError();
 
     ExprResult CondExpr = SemaRef.BuildBinOp(
-        CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, MakeIVRef(), EndBound);
+        CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, MakeIVRef(), EndExpr);
     if (!CondExpr.isUsable())
       return StmtError();
 
@@ -16086,7 +16096,6 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
     if (!IncrExpr.isUsable())
       return StmtError();
 
-    // orig_var = IV so the original body sees the same variable.
     ExprResult UpdateExpr = SemaRef.BuildBinOp(CurScope, OrigVarLoc, BO_Assign,
                                                OrigVar, MakeIVRef());
     if (!UpdateExpr.isUsable())
@@ -16106,10 +16115,9 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
         ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr,
                 IncrExpr.get(), LoopBody, LoopHelper.Init->getBeginLoc(),
                 LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc());
-    // Push the splitted for loops into SplitLoops
     SplitLoops.push_back(For);
   }
-  // Combine all the loops into a compound statement
+
   auto *SplitStmt = CompoundStmt::Create(
       Context, SplitLoops, FPOptionsOverride(),
       SplitLoops.front()->getBeginLoc(), SplitLoops.back()->getEndLoc());
@@ -18067,91 +18075,17 @@ OMPClause *SemaOpenMP::ActOnOpenMPSizesClause(ArrayRef<Expr *> SizeExprs,
                                 SanitizedSizeExprs);
 }
 
-EnumConstantDecl *SemaOpenMP::getOrCreateOMPFillCountMarker() {
-  if (OMPFillCountMarker)
-    return OMPFillCountMarker;
-
-  ASTContext &Ctx = getASTContext();
-  TranslationUnitDecl *TU = Ctx.getTranslationUnitDecl();
-  Preprocessor &PP = SemaRef.PP;
-  IdentifierInfo *EnumII =
-      &PP.getIdentifierTable().get("__clang_omp_counts_fill_tag");
-  EnumDecl *ED = EnumDecl::Create(
-      Ctx, TU, SourceLocation{}, SourceLocation{}, EnumII, /*PrevDecl=*/nullptr,
-      /*IsScoped=*/false, /*IsScopedUsingClassTag=*/false, /*IsFixed=*/false);
-  ED->setImplicit(true);
-
-  QualType IntTy = Ctx.IntTy;
-  ED->setIntegerType(IntTy);
-  ED->setPromotionType(IntTy);
-
-  IdentifierInfo *FillII = &PP.getIdentifierTable().get("omp_fill");
-  llvm::APSInt Zero(Ctx.getIntWidth(IntTy),
-                    /*isUnsigned=*/!IntTy->isSignedIntegerType());
-  Zero.setIsSigned(IntTy->isSignedIntegerType());
-  IntegerLiteral *IL =
-      IntegerLiteral::Create(Ctx, Zero, IntTy, SourceLocation{});
-  EnumConstantDecl *ECD = EnumConstantDecl::Create(Ctx, ED, SourceLocation{},
-                                                   FillII, IntTy, IL, Zero);
-  ECD->setImplicit(true);
-
-  ED->addDecl(ECD);
-
-  llvm::SmallVector<Decl *, 1> Elements;
-  Elements.push_back(ECD);
-  unsigned NumNegativeBits = 0;
-  unsigned NumPositiveBits = 0;
-  Ctx.computeEnumBits(Elements, NumNegativeBits, NumPositiveBits);
-
-  ED->completeDefinition(IntTy, IntTy, NumPositiveBits, NumNegativeBits);
-
-  if (!SemaRef.getLangOpts().CPlusPlus)
-    ECD->setType(IntTy);
-  else {
-    QualType EnumTy = Ctx.getTagType(ElaboratedTypeKeyword::None,
-                                     /*Qualifier=*/std::nullopt, ED,
-                                     /*OwnsTag=*/false);
-    ECD->setType(EnumTy);
-  }
-
-  TU->addDecl(ED);
-
-  OMPFillCountMarker = ECD;
-  return ECD;
-}
-
-ExprResult SemaOpenMP::ActOnOpenMPCountsFillExpr(SourceLocation Loc) {
-  EnumConstantDecl *ECD = getOrCreateOMPFillCountMarker();
-  ASTContext &Ctx = getASTContext();
-  QualType T = ECD->getType();
-  return DeclRefExpr::Create(Ctx, NestedNameSpecifierLoc(), SourceLocation(),
-                             ECD, /*RefersToEnclosingVariableOrCapture=*/false,
-                             Loc, T, VK_PRValue, ECD);
-}
-
-bool SemaOpenMP::isOMPFillCountExpr(const Expr *E) const {
-  if (!E)
-    return false;
-  E = E->IgnoreParenImpCasts();
-  const auto *DRE = dyn_cast<DeclRefExpr>(E);
-  if (!DRE)
-    return false;
-  if (OMPFillCountMarker)
-    return DRE->getDecl() == OMPFillCountMarker;
-  const auto *ECD = dyn_cast<EnumConstantDecl>(DRE->getDecl());
-  return ECD && ECD->isImplicit() && ECD->getName() == "omp_fill";
-}
-
-OMPClause *SemaOpenMP::ActOnOpenMPCountsClause(ArrayRef<Expr *> CountExprs,
-                                               SourceLocation StartLoc,
-                                               SourceLocation LParenLoc,
-                                               SourceLocation EndLoc) {
+OMPClause *SemaOpenMP::ActOnOpenMPCountsClause(
+    ArrayRef<Expr *> CountExprs, SourceLocation StartLoc,
+    SourceLocation LParenLoc, SourceLocation EndLoc, unsigned FillIdx,
+    SourceLocation FillLoc, unsigned FillCount) {
   SmallVector<Expr *> SanitizedCountExprs(CountExprs);
 
-  for (Expr *&CountExpr : SanitizedCountExprs) {
-    if (!CountExpr)
+  for (unsigned I = 0; I < SanitizedCountExprs.size(); ++I) {
+    Expr *&CountExpr = SanitizedCountExprs[I];
+    if (I == FillIdx)
       continue;
-    if (isOMPFillCountExpr(CountExpr))
+    if (!CountExpr)
       continue;
 
     bool IsValid = isNonNegativeIntegerValue(CountExpr, SemaRef, OMPC_counts,
@@ -18165,8 +18099,13 @@ OMPClause *SemaOpenMP::ActOnOpenMPCountsClause(ArrayRef<Expr *> CountExprs,
       CountExpr = nullptr;
   }
 
+  if (FillCount != 1) {
+    Diag(FillCount == 0 ? StartLoc : FillLoc,
+         diag::err_omp_split_counts_not_one_omp_fill);
+  }
+
   return OMPCountsClause::Create(getASTContext(), StartLoc, LParenLoc, EndLoc,
-                                 SanitizedCountExprs);
+                                 SanitizedCountExprs, FillIdx, FillLoc);
 }
 
 OMPClause *SemaOpenMP::ActOnOpenMPPermutationClause(ArrayRef<Expr *> PermExprs,
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index bc78f64f0e095..1192a207b56bd 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1772,9 +1772,11 @@ class TreeTransform {
   OMPClause *RebuildOMPCountsClause(ArrayRef<Expr *> Counts,
                                     SourceLocation StartLoc,
                                     SourceLocation LParenLoc,
-                                    SourceLocation EndLoc) {
-    return getSema().OpenMP().ActOnOpenMPCountsClause(Counts, StartLoc,
-                                                      LParenLoc, EndLoc);
+                                    SourceLocation EndLoc, unsigned FillIdx,
+                                    SourceLocation FillLoc) {
+    unsigned FillCount = (FillIdx != UINT_MAX) ? 1 : 0;
+    return getSema().OpenMP().ActOnOpenMPCountsClause(
+        Counts, StartLoc, LParenLoc, EndLoc, FillIdx, FillLoc, FillCount);
   }
 
   /// Build a new OpenMP 'permutation' clause.
@@ -10652,7 +10654,8 @@ TreeTransform<Derived>::TransformOMPCountsClause(OMPCountsClause *C) {
   }
 
   return RebuildOMPCountsClause(TransformedCounts, C->getBeginLoc(),
-                                C->getLParenLoc(), C->getEndLoc());
+                                C->getLParenLoc(), C->getEndLoc(),
+                                C->getOmpFillIndex(), C->getOmpFillLoc());
 }
 
 template <typename Derived>
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 10f5e106b9d97..c456474b66926 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -11856,6 +11856,8 @@ void OMPClauseReader::VisitOMPSizesClause(OMPSizesClause *C) {
 }
 
 void OMPClauseReader::VisitOMPCountsClause(OMPCountsClause *C) {
+  C->setOmpFillIndex(Record.readInt());
+  C->setOmpFillLoc(Record.readSourceLocation());
   for (Expr *&E : C->getCountsRefs())
     E = Record.readSubExpr();
   C->setLParenLoc(Record.readSourceLocation());
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index e10e402b1cbf8..42e054bec7e18 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -8072,6 +8072,8 @@ void OMPClauseWriter::VisitOMPSizesClause(OMPSizesClause *C) {
 
 void OMPClauseWriter::VisitOMPCountsClause(OMPCountsClause *C) {
   Record.push_back(C->getNumCounts());
+  Record.push_back(C->getOmpFillIndex());
+  Record.AddSourceLocation(C->getOmpFillLoc());
   for (Expr *Count : C->getCountsRefs())
     Record.AddStmt(Count);
   Record.AddSourceLocation(C->getLParenLoc());
diff --git a/clang/test/AST/ast-dump-openmp-split.c b/clang/test/AST/ast-dump-openmp-split.c
index e4eb51becd54a..821badae55e66 100644
--- a/clang/test/AST/ast-dump-openmp-split.c
+++ b/clang/test/AST/ast-dump-openmp-split.c
@@ -5,7 +5,7 @@
 void body(int);
 
 void test(void) {
-#pragma omp split counts(3, 7)
+#pragma omp split counts(3, omp_fill)
   for (int i = 0; i < 10; ++i)
     body(i);
 }
@@ -13,7 +13,7 @@ void test(void) {
 // CHECK: OMPSplitDirective
 // CHECK: OMPCountsClause
 // CHECK: IntegerLiteral{{.*}}3
-// CHECK: IntegerLiteral{{.*}}7
+// CHECK: <<<NULL>>>
 // CHECK: ForStmt
 // CHECK: <<<NULL>>>
 // CHECK: CallExpr
diff --git a/clang/test/Index/openmp-split.c b/clang/test/Index/openmp-split.c
index 41ce0e1e87713..0c63f12297930 100644
--- a/clang/test/Index/openmp-split.c
+++ b/clang/test/Index/openmp-split.c
@@ -1,12 +1,11 @@
 // RUN: c-index-test -test-load-source local %s -fopenmp=libomp -fopenmp-version=60 | FileCheck %s
 
 void test(void) {
-#pragma omp split counts(3, 7)
+#pragma omp split counts(3, omp_fill)
   for (int i = 0; i < 20; i += 1)
     ;
 }
 
-// CHECK: openmp-split.c:4:1: OMPSplitDirective= Extent=[4:1 - 4:31]
+// CHECK: openmp-split.c:4:1: OMPSplitDirective= Extent=[4:1 - 4:38]
 // CHECK: openmp-split.c:4:26: IntegerLiteral= Extent=[4:26 - 4:27]
-// CHECK: openmp-split.c:4:29: IntegerLiteral= Extent=[4:29 - 4:30]
 // CHECK: openmp-split.c:5:3: ForStmt= Extent=[5:3 - 6:6]
diff --git a/clang/test/OpenMP/split_counts_verify.c b/clang/test/OpenMP/split_counts_verify.c
index 3eec02f974e07..6c19999045698 100644
--- a/clang/test/OpenMP/split_counts_verify.c
+++ b/clang/test/OpenMP/split_counts_verify.c
@@ -1,6 +1,6 @@
 /*
  * Verify #pragma omp split counts(c1, c2, ...) at AST, IR, and runtime.
- * counts(3, 5, 2) splits 10 iterations into: [0..3), [3..8), [8..10).
+ * counts(3, omp_fill, 2) with n=10 splits into: [0..3), [3..8), [8..10).
  * Sum 0+1+...+9 = 45.
  */
 // REQUIRES: x86-registered-target
@@ -12,7 +12,7 @@
 // 2) AST dump should show OMPSplitDirective with OMPCountsClause node.
 // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-dump %s 2>&1 | FileCheck %s --check-prefix=AST
 
-// 3) Emit LLVM: three sequential loops (multiple phi/br for loop structure)
+// 3) Emit LLVM: three sequential loops
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=60 -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=IR
 
 
@@ -20,7 +20,7 @@ int main(void) {
   const int n = 10;
   int sum = 0;
 
-#pragma omp split counts(3, 5, 2)
+#pragma omp split counts(3, omp_fill, 2)
   for (int i = 0; i < n; ++i) {
     sum += i;
   }
@@ -38,4 +38,3 @@ int main(void) {
 // IR: icmp slt i32 {{.*}}, 8
 // IR: .split.iv.2
 // IR: icmp slt i32 {{.*}}, 10
-// IR: icmp eq i32 {{.*}}, 45
diff --git a/clang/test/OpenMP/split_no_fill_print.c b/clang/test/OpenMP/split_omp_fill.c
similarity index 81%
rename from clang/test/OpenMP/split_no_fill_print.c
rename to clang/test/OpenMP/split_omp_fill.c
index 017b4a27846d5..075985bd6ec82 100644
--- a/clang/test/OpenMP/split_no_fill_print.c
+++ b/clang/test/OpenMP/split_omp_fill.c
@@ -1,4 +1,4 @@
-/* Simple split + counts without omp_fill: syntax, AST dump, ast-print, IR. */
+/* Split + counts with omp_fill: syntax, AST dump, ast-print, IR. */
 // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s
 // expected-no-diagnostics
 //
@@ -13,13 +13,13 @@ void body(int);
 // PRINT-LABEL: void foo(
 // DUMP-LABEL:  FunctionDecl {{.*}} foo
 void foo(int n) {
-  // PRINT:     #pragma omp split counts(3, 7)
+  // PRINT:     #pragma omp split counts(3, omp_fill)
   // DUMP: OMPSplitDirective
   // DUMP-NEXT: |-OMPCountsClause
   // DUMP-NEXT: | |-IntegerLiteral {{.*}} 'int' 3
-  // DUMP-NEXT: | `-IntegerLiteral {{.*}} 'int' 7
+  // DUMP-NEXT: | `-{{.*}}
   // DUMP-NEXT: {{.*}}`-ForStmt
-#pragma omp split counts(3, 7)
+#pragma omp split counts(3, omp_fill)
   // PRINT: for (int i = 0; i < n; ++i)
   for (int i = 0; i < n; ++i)
     body(i);
@@ -30,5 +30,5 @@ void foo(int n) {
 // LLVM: icmp slt i32 {{.*}}, 3
 // LLVM: call void @body(
 // LLVM: store i32 3, ptr %.split.iv.1.i
-// LLVM: icmp slt i32 {{.*}}, 10
+// LLVM: icmp slt i32 {{.*}}, %{{.*}}
 // LLVM: call void @body(
diff --git a/clang/test/OpenMP/split_simple_test.c b/clang/test/OpenMP/split_test.c
similarity index 72%
rename from clang/test/OpenMP/split_simple_test.c
rename to clang/test/OpenMP/split_test.c
index 021759e6a70e6..af9686ce1f729 100644
--- a/clang/test/OpenMP/split_simple_test.c
+++ b/clang/test/OpenMP/split_test.c
@@ -1,6 +1,6 @@
 /*
  * Simple test for #pragma omp split counts: one for-loop is transformed
- * into two loops (counts(5, 5) => [0..5) and [5..10)).
+ * into two loops (counts(5, omp_fill) with n=10 => [0..5) and [5..10)).
  */
 // Verify the split directive compiles and emits IR (two sequential loops).
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
@@ -9,7 +9,7 @@ int main(void) {
   const int n = 10;
   int sum = 0;
 
-#pragma omp split counts(5, 5)
+#pragma omp split counts(5, omp_fill)
   for (int i = 0; i < n; ++i) {
     sum += i;
   }
@@ -19,9 +19,9 @@ int main(void) {
 
 // CHECK: define
 // CHECK: load
-// Split produces two sequential loops (counts(5, 5) => bounds 5, 10).
+// Split produces two sequential loops (counts(5, omp_fill) with n=10).
 // CHECK: .split.iv
 // CHECK: icmp slt i32 {{.*}}, 5
 // CHECK: .split.iv
-// CHECK: icmp slt i32 {{.*}}, 10
+// CHECK: icmp slt
 // CHECK: br i1
diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
index b3b9b5cd5a54b..f725784361bf6 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
@@ -3111,7 +3111,7 @@ TEST(ASTMatchersTestOpenMP, OMPSplitDirective) {
 
   StringRef SplitOk = R"(
 void f() {
-#pragma omp split counts(2, 3)
+#pragma omp split counts(2, omp_fill)
   for (int i = 0; i < 10; ++i) {}
 }
 )";
@@ -3160,12 +3160,9 @@ void f() {
   }
   ASSERT_TRUE(Counts);
   ASSERT_EQ(Counts->getNumCounts(), 2u);
-  const Expr *FillExpr = Counts->getCountsRefs()[1]->IgnoreParenImpCasts();
-  const auto *DRE = dyn_cast<DeclRefExpr>(FillExpr);
-  ASSERT_TRUE(DRE);
-  const auto *ECD = dyn_cast<EnumConstantDecl>(DRE->getDecl());
-  ASSERT_TRUE(ECD);
-  EXPECT_EQ(ECD->getName(), "omp_fill");
+  EXPECT_TRUE(Counts->hasOmpFill());
+  EXPECT_EQ(Counts->getOmpFillIndex(), 1u);
+  EXPECT_FALSE(Counts->getCountsRefs()[1]);
 }
 
 TEST(ASTMatchersTest, Finder_DynamicOnlyAcceptsSomeMatchers) {
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 295f37691a4b3..e0b2644249e76 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -19643,11 +19643,6 @@ TEST_F(FormatTest, UnderstandsPragmaOmpTarget) {
       getLLVMStyleWithColumns(26));
 }
 
-TEST_F(FormatTest, UnderstandsPragmaOmpSplit) {
-  verifyFormat("#pragma omp split counts(2, 3)");
-  verifyFormat("#pragma omp split counts(2, omp_fill)");
-}
-
 TEST_F(FormatTest, UnderstandPragmaOption) {
   verifyFormat("#pragma option -C -A");
 

>From cdfc826a573dfc23e3014e372932c1c606acbf51 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Tue, 31 Mar 2026 06:07:13 -0400
Subject: [PATCH 23/32] non-zero-start

---
 clang/lib/Sema/SemaOpenMP.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 98a57dd791caf..b602187309989 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -16096,9 +16096,9 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
     if (!IncrExpr.isUsable())
       return StmtError();
 
-    ExprResult UpdateExpr = SemaRef.BuildBinOp(CurScope, OrigVarLoc, BO_Assign,
-                                               OrigVar, MakeIVRef());
-    if (!UpdateExpr.isUsable())
+    ExprResult IVAssign = SemaRef.BuildBinOp(
+        CurScope, OrigVarLoc, BO_Assign, IterationVarRef, MakeIVRef());
+    if (!IVAssign.isUsable())
       return StmtError();
 
     SmallVector<Stmt *, 4> BodyStmts;

>From 25bef899e3b3e65907755967f0d45920fe9ced88 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Tue, 31 Mar 2026 06:08:19 -0400
Subject: [PATCH 24/32] codegen

---
 clang/test/OpenMP/split_codegen.cpp | 1964 +++++++++++++++++++++++++++
 clang/test/OpenMP/split_test.c      |   27 -
 2 files changed, 1964 insertions(+), 27 deletions(-)
 create mode 100644 clang/test/OpenMP/split_codegen.cpp
 delete mode 100644 clang/test/OpenMP/split_test.c

diff --git a/clang/test/OpenMP/split_codegen.cpp b/clang/test/OpenMP/split_codegen.cpp
new file mode 100644
index 0000000000000..44eb95fbe0a9f
--- /dev/null
+++ b/clang/test/OpenMP/split_codegen.cpp
@@ -0,0 +1,1964 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 4
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fclang-abi-compat=latest -std=c++20 -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fclang-abi-compat=latest -std=c++20 -fopenmp -fopenmp-version=60 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fclang-abi-compat=latest -std=c++20 -fopenmp -fopenmp-version=60 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+extern "C" void body(...) {}
+
+struct S {
+  int i;
+  S() {
+#pragma omp split counts(5, omp_fill)
+    for (i = 0; i < 20; i++)
+      body(i);
+  }
+} s;
+
+extern "C" void split_two_const_trip() {
+#pragma omp split counts(3, omp_fill)
+  for (int i = 0; i < 10; ++i)
+    body(i);
+}
+
+extern "C" void split_var_trip(int n) {
+#pragma omp split counts(3, omp_fill)
+  for (int i = 0; i < n; ++i)
+    body(i);
+}
+
+extern "C" void split_mid_fill(int n) {
+#pragma omp split counts(2, omp_fill, 3)
+  for (int i = 0; i < n; ++i)
+    body(i);
+}
+
+extern "C" void split_first_fill(int n) {
+#pragma omp split counts(omp_fill, 4)
+  for (int i = 0; i < n; ++i)
+    body(i);
+}
+
+extern "C" void split_only_fill(int n) {
+#pragma omp split counts(omp_fill)
+  for (int i = 0; i < n; ++i)
+    body(i);
+}
+
+extern "C" void split_neg_start(int n) {
+#pragma omp split counts(1, omp_fill, 1)
+  for (int i = -1; i <= n; ++i)
+    body(i);
+}
+
+extern "C" void split_zero_first() {
+#pragma omp split counts(0, omp_fill)
+  for (int i = 0; i < 10; ++i)
+    body(i);
+}
+
+extern "C" void split_three_const(int n) {
+#pragma omp split counts(2, 3, omp_fill)
+  for (int i = 0; i < n; ++i)
+    body(i);
+}
+
+extern "C" void split_step2(int n) {
+#pragma omp split counts(3, omp_fill)
+  for (int i = 0; i < n; i += 2)
+    body(i);
+}
+
+extern "C" void split_decrement(int n) {
+#pragma omp split counts(omp_fill, 2)
+  for (int i = n; i > 0; --i)
+    body(i);
+}
+
+void split_range_for() {
+  int a[] = {10, 20, 30, 40};
+#pragma omp split counts(2, omp_fill)
+  for (int &x : a)
+    body(x);
+}
+
+#endif
+// CHECK1-LABEL: define dso_local void @body(
+// CHECK1-SAME: ...) #[[ATTR0:[0-9]+]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define internal void @__cxx_global_var_init(
+// CHECK1-SAME: ) #[[ATTR1:[0-9]+]] section ".text.startup" {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    call void @_ZN1SC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @s)
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define linkonce_odr void @_ZN1SC1Ev(
+// CHECK1-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    call void @_ZN1SC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]])
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define linkonce_odr void @_ZN1SC2Ev(
+// CHECK1-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[I2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[I:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT:    [[I3:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT:    store ptr [[I3]], ptr [[I2]], align 8
+// CHECK1-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK1:       for.cond:
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 5
+// CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK1:       for.body:
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP1]], ptr @_ZZN1SC1EvE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr @_ZZN1SC1EvE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2:![0-9]+]], !align [[META3:![0-9]+]]
+// CHECK1-NEXT:    store i32 [[ADD]], ptr [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP5]])
+// CHECK1-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK1:       for.inc:
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
+// CHECK1:       for.end:
+// CHECK1-NEXT:    store i32 5, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND4:%.*]]
+// CHECK1:       for.cond4:
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[TMP7]], 20
+// CHECK1-NEXT:    br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END11:%.*]]
+// CHECK1:       for.body6:
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP8]], ptr @_ZZN1SC1EvE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr @_ZZN1SC1EvE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL7:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK1-NEXT:    [[ADD8:%.*]] = add nsw i32 0, [[MUL7]]
+// CHECK1-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK1-NEXT:    store i32 [[ADD8]], ptr [[TMP10]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP12]])
+// CHECK1-NEXT:    br label [[FOR_INC9:%.*]]
+// CHECK1:       for.inc9:
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[INC10:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK1-NEXT:    store i32 [[INC10]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND4]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK1:       for.end11:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @split_two_const_trip(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK1:       for.cond:
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 3
+// CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK1:       for.body:
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP1]], ptr @_ZZ20split_two_const_tripE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr @_ZZ20split_two_const_tripE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK1-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP3]])
+// CHECK1-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK1:       for.inc:
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP4]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK1:       for.end:
+// CHECK1-NEXT:    store i32 3, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND1:%.*]]
+// CHECK1:       for.cond1:
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10
+// CHECK1-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END8:%.*]]
+// CHECK1:       for.body3:
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP6]], ptr @_ZZ20split_two_const_tripE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr @_ZZ20split_two_const_tripE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL4:%.*]] = mul nsw i32 [[TMP7]], 1
+// CHECK1-NEXT:    [[ADD5:%.*]] = add nsw i32 0, [[MUL4]]
+// CHECK1-NEXT:    store i32 [[ADD5]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP8]])
+// CHECK1-NEXT:    br label [[FOR_INC6:%.*]]
+// CHECK1:       for.inc6:
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[INC7:%.*]] = add nsw i32 [[TMP9]], 1
+// CHECK1-NEXT:    store i32 [[INC7]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK1:       for.end8:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @split_var_trip(
+// CHECK1-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
+// CHECK1-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK1-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK1:       for.cond:
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP2]], 3
+// CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK1:       for.body:
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP3]], ptr @_ZZ14split_var_tripE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ14split_var_tripE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK1-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP5]])
+// CHECK1-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK1:       for.inc:
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK1:       for.end:
+// CHECK1-NEXT:    store i32 3, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND3:%.*]]
+// CHECK1:       for.cond3:
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK1-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[TMP7]], [[ADD4]]
+// CHECK1-NEXT:    br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END11:%.*]]
+// CHECK1:       for.body6:
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP9]], ptr @_ZZ14split_var_tripE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr @_ZZ14split_var_tripE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL7:%.*]] = mul nsw i32 [[TMP10]], 1
+// CHECK1-NEXT:    [[ADD8:%.*]] = add nsw i32 0, [[MUL7]]
+// CHECK1-NEXT:    store i32 [[ADD8]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP11]])
+// CHECK1-NEXT:    br label [[FOR_INC9:%.*]]
+// CHECK1:       for.inc9:
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[INC10:%.*]] = add nsw i32 [[TMP12]], 1
+// CHECK1-NEXT:    store i32 [[INC10]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND3]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK1:       for.end11:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @split_mid_fill(
+// CHECK1-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
+// CHECK1-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK1-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK1:       for.cond:
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP2]], 2
+// CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK1:       for.body:
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP3]], ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK1-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP5]])
+// CHECK1-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK1:       for.inc:
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
+// CHECK1:       for.end:
+// CHECK1-NEXT:    store i32 2, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND3:%.*]]
+// CHECK1:       for.cond3:
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK1-NEXT:    [[SUB5:%.*]] = sub nsw i32 [[ADD4]], 3
+// CHECK1-NEXT:    [[CMP6:%.*]] = icmp slt i32 [[TMP7]], [[SUB5]]
+// CHECK1-NEXT:    br i1 [[CMP6]], label [[FOR_BODY7:%.*]], label [[FOR_END12:%.*]]
+// CHECK1:       for.body7:
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP9]], ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL8:%.*]] = mul nsw i32 [[TMP10]], 1
+// CHECK1-NEXT:    [[ADD9:%.*]] = add nsw i32 0, [[MUL8]]
+// CHECK1-NEXT:    store i32 [[ADD9]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP11]])
+// CHECK1-NEXT:    br label [[FOR_INC10:%.*]]
+// CHECK1:       for.inc10:
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[INC11:%.*]] = add nsw i32 [[TMP12]], 1
+// CHECK1-NEXT:    store i32 [[INC11]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND3]], !llvm.loop [[LOOP12:![0-9]+]]
+// CHECK1:       for.end12:
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK1-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[ADD13]], 3
+// CHECK1-NEXT:    store i32 [[SUB14]], ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND15:%.*]]
+// CHECK1:       for.cond15:
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP15]], 1
+// CHECK1-NEXT:    [[CMP17:%.*]] = icmp slt i32 [[TMP14]], [[ADD16]]
+// CHECK1-NEXT:    br i1 [[CMP17]], label [[FOR_BODY18:%.*]], label [[FOR_END23:%.*]]
+// CHECK1:       for.body18:
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP16]], ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL19:%.*]] = mul nsw i32 [[TMP17]], 1
+// CHECK1-NEXT:    [[ADD20:%.*]] = add nsw i32 0, [[MUL19]]
+// CHECK1-NEXT:    store i32 [[ADD20]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP18]])
+// CHECK1-NEXT:    br label [[FOR_INC21:%.*]]
+// CHECK1:       for.inc21:
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK1-NEXT:    [[INC22:%.*]] = add nsw i32 [[TMP19]], 1
+// CHECK1-NEXT:    store i32 [[INC22]], ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND15]], !llvm.loop [[LOOP13:![0-9]+]]
+// CHECK1:       for.end23:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @split_first_fill(
+// CHECK1-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
+// CHECK1-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK1-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK1:       for.cond:
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// CHECK1-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[ADD]], 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP2]], [[SUB3]]
+// CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK1:       for.body:
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP4]], ptr @_ZZ16split_first_fillE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr @_ZZ16split_first_fillE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP5]], 1
+// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK1-NEXT:    store i32 [[ADD4]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP6]])
+// CHECK1-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK1:       for.inc:
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]]
+// CHECK1:       for.end:
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK1-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[ADD5]], 4
+// CHECK1-NEXT:    store i32 [[SUB6]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND7:%.*]]
+// CHECK1:       for.cond7:
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1
+// CHECK1-NEXT:    [[CMP9:%.*]] = icmp slt i32 [[TMP9]], [[ADD8]]
+// CHECK1-NEXT:    br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END15:%.*]]
+// CHECK1:       for.body10:
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP11]], ptr @_ZZ16split_first_fillE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr @_ZZ16split_first_fillE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL11:%.*]] = mul nsw i32 [[TMP12]], 1
+// CHECK1-NEXT:    [[ADD12:%.*]] = add nsw i32 0, [[MUL11]]
+// CHECK1-NEXT:    store i32 [[ADD12]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP13]])
+// CHECK1-NEXT:    br label [[FOR_INC13:%.*]]
+// CHECK1:       for.inc13:
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[INC14:%.*]] = add nsw i32 [[TMP14]], 1
+// CHECK1-NEXT:    store i32 [[INC14]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND7]], !llvm.loop [[LOOP15:![0-9]+]]
+// CHECK1:       for.end15:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @split_only_fill(
+// CHECK1-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
+// CHECK1-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK1-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK1:       for.cond:
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP2]], [[ADD]]
+// CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK1:       for.body:
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP4]], ptr @_ZZ15split_only_fillE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr @_ZZ15split_only_fillE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP5]], 1
+// CHECK1-NEXT:    [[ADD3:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK1-NEXT:    store i32 [[ADD3]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP6]])
+// CHECK1-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK1:       for.inc:
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK1:       for.end:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @split_neg_start(
+// CHECK1-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK1-NEXT:    store i32 -1, ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[SUB:%.*]] = sub i32 [[TMP1]], -2
+// CHECK1-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 1
+// CHECK1-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// CHECK1-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK1:       for.cond:
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp ult i32 [[TMP2]], 1
+// CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK1:       for.body:
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP3]], ptr @_ZZ15split_neg_startE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ15split_neg_startE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul i32 [[TMP4]], 1
+// CHECK1-NEXT:    [[ADD:%.*]] = add i32 -1, [[MUL]]
+// CHECK1-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP5]])
+// CHECK1-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK1:       for.inc:
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add i32 [[TMP6]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
+// CHECK1:       for.end:
+// CHECK1-NEXT:    store i32 1, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND3:%.*]]
+// CHECK1:       for.cond3:
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD4:%.*]] = add i32 [[TMP8]], 1
+// CHECK1-NEXT:    [[SUB5:%.*]] = sub i32 [[ADD4]], 1
+// CHECK1-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP7]], [[SUB5]]
+// CHECK1-NEXT:    br i1 [[CMP6]], label [[FOR_BODY7:%.*]], label [[FOR_END12:%.*]]
+// CHECK1:       for.body7:
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP9]], ptr @_ZZ15split_neg_startE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr @_ZZ15split_neg_startE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL8:%.*]] = mul i32 [[TMP10]], 1
+// CHECK1-NEXT:    [[ADD9:%.*]] = add i32 -1, [[MUL8]]
+// CHECK1-NEXT:    store i32 [[ADD9]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP11]])
+// CHECK1-NEXT:    br label [[FOR_INC10:%.*]]
+// CHECK1:       for.inc10:
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[INC11:%.*]] = add i32 [[TMP12]], 1
+// CHECK1-NEXT:    store i32 [[INC11]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND3]], !llvm.loop [[LOOP18:![0-9]+]]
+// CHECK1:       for.end12:
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD13:%.*]] = add i32 [[TMP13]], 1
+// CHECK1-NEXT:    [[SUB14:%.*]] = sub i32 [[ADD13]], 1
+// CHECK1-NEXT:    store i32 [[SUB14]], ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND15:%.*]]
+// CHECK1:       for.cond15:
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD16:%.*]] = add i32 [[TMP15]], 1
+// CHECK1-NEXT:    [[CMP17:%.*]] = icmp ult i32 [[TMP14]], [[ADD16]]
+// CHECK1-NEXT:    br i1 [[CMP17]], label [[FOR_BODY18:%.*]], label [[FOR_END23:%.*]]
+// CHECK1:       for.body18:
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP16]], ptr @_ZZ15split_neg_startE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr @_ZZ15split_neg_startE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL19:%.*]] = mul i32 [[TMP17]], 1
+// CHECK1-NEXT:    [[ADD20:%.*]] = add i32 -1, [[MUL19]]
+// CHECK1-NEXT:    store i32 [[ADD20]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP18]])
+// CHECK1-NEXT:    br label [[FOR_INC21:%.*]]
+// CHECK1:       for.inc21:
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK1-NEXT:    [[INC22:%.*]] = add i32 [[TMP19]], 1
+// CHECK1-NEXT:    store i32 [[INC22]], ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND15]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK1:       for.end23:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @split_zero_first(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK1:       for.cond:
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 0
+// CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK1:       for.body:
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP1]], ptr @_ZZ16split_zero_firstE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr @_ZZ16split_zero_firstE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK1-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP3]])
+// CHECK1-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK1:       for.inc:
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP4]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
+// CHECK1:       for.end:
+// CHECK1-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND1:%.*]]
+// CHECK1:       for.cond1:
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10
+// CHECK1-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END8:%.*]]
+// CHECK1:       for.body3:
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP6]], ptr @_ZZ16split_zero_firstE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr @_ZZ16split_zero_firstE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL4:%.*]] = mul nsw i32 [[TMP7]], 1
+// CHECK1-NEXT:    [[ADD5:%.*]] = add nsw i32 0, [[MUL4]]
+// CHECK1-NEXT:    store i32 [[ADD5]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP8]])
+// CHECK1-NEXT:    br label [[FOR_INC6:%.*]]
+// CHECK1:       for.inc6:
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[INC7:%.*]] = add nsw i32 [[TMP9]], 1
+// CHECK1-NEXT:    store i32 [[INC7]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP21:![0-9]+]]
+// CHECK1:       for.end8:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @split_three_const(
+// CHECK1-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
+// CHECK1-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK1-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK1:       for.cond:
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP2]], 2
+// CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK1:       for.body:
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP3]], ptr @_ZZ17split_three_constE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ17split_three_constE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK1-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP5]])
+// CHECK1-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK1:       for.inc:
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+// CHECK1:       for.end:
+// CHECK1-NEXT:    store i32 2, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND3:%.*]]
+// CHECK1:       for.cond3:
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[CMP4:%.*]] = icmp slt i32 [[TMP7]], 5
+// CHECK1-NEXT:    br i1 [[CMP4]], label [[FOR_BODY5:%.*]], label [[FOR_END10:%.*]]
+// CHECK1:       for.body5:
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP8]], ptr @_ZZ17split_three_constE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr @_ZZ17split_three_constE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL6:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK1-NEXT:    [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
+// CHECK1-NEXT:    store i32 [[ADD7]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP10]])
+// CHECK1-NEXT:    br label [[FOR_INC8:%.*]]
+// CHECK1:       for.inc8:
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[INC9:%.*]] = add nsw i32 [[TMP11]], 1
+// CHECK1-NEXT:    store i32 [[INC9]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND3]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK1:       for.end10:
+// CHECK1-NEXT:    store i32 5, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND11:%.*]]
+// CHECK1:       for.cond11:
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD12:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK1-NEXT:    [[CMP13:%.*]] = icmp slt i32 [[TMP12]], [[ADD12]]
+// CHECK1-NEXT:    br i1 [[CMP13]], label [[FOR_BODY14:%.*]], label [[FOR_END19:%.*]]
+// CHECK1:       for.body14:
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP14]], ptr @_ZZ17split_three_constE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr @_ZZ17split_three_constE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL15:%.*]] = mul nsw i32 [[TMP15]], 1
+// CHECK1-NEXT:    [[ADD16:%.*]] = add nsw i32 0, [[MUL15]]
+// CHECK1-NEXT:    store i32 [[ADD16]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP16]])
+// CHECK1-NEXT:    br label [[FOR_INC17:%.*]]
+// CHECK1:       for.inc17:
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK1-NEXT:    [[INC18:%.*]] = add nsw i32 [[TMP17]], 1
+// CHECK1-NEXT:    store i32 [[INC18]], ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND11]], !llvm.loop [[LOOP24:![0-9]+]]
+// CHECK1:       for.end19:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @split_step2(
+// CHECK1-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[SUB:%.*]] = sub i32 [[TMP1]], -1
+// CHECK1-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 2
+// CHECK1-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// CHECK1-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK1:       for.cond:
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp ult i32 [[TMP2]], 3
+// CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK1:       for.body:
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP3]], ptr @_ZZ11split_step2E7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ11split_step2E7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul i32 [[TMP4]], 2
+// CHECK1-NEXT:    [[ADD:%.*]] = add i32 0, [[MUL]]
+// CHECK1-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP5]])
+// CHECK1-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK1:       for.inc:
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add i32 [[TMP6]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK1:       for.end:
+// CHECK1-NEXT:    store i32 3, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND3:%.*]]
+// CHECK1:       for.cond3:
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD4:%.*]] = add i32 [[TMP8]], 1
+// CHECK1-NEXT:    [[CMP5:%.*]] = icmp ult i32 [[TMP7]], [[ADD4]]
+// CHECK1-NEXT:    br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END11:%.*]]
+// CHECK1:       for.body6:
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP9]], ptr @_ZZ11split_step2E7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr @_ZZ11split_step2E7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL7:%.*]] = mul i32 [[TMP10]], 2
+// CHECK1-NEXT:    [[ADD8:%.*]] = add i32 0, [[MUL7]]
+// CHECK1-NEXT:    store i32 [[ADD8]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP11]])
+// CHECK1-NEXT:    br label [[FOR_INC9:%.*]]
+// CHECK1:       for.inc9:
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[INC10:%.*]] = add i32 [[TMP12]], 1
+// CHECK1-NEXT:    store i32 [[INC10]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND3]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK1:       for.end11:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @split_decrement(
+// CHECK1-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
+// CHECK1-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK1-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK1:       for.cond:
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
+// CHECK1-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[ADD]], 2
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP3]], [[SUB3]]
+// CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK1:       for.body:
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP5]], ptr @_ZZ15split_decrementE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr @_ZZ15split_decrementE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
+// CHECK1-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP6]], [[MUL]]
+// CHECK1-NEXT:    store i32 [[SUB4]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP8]])
+// CHECK1-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK1:       for.inc:
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP9]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
+// CHECK1:       for.end:
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1
+// CHECK1-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[ADD5]], 2
+// CHECK1-NEXT:    store i32 [[SUB6]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND7:%.*]]
+// CHECK1:       for.cond7:
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP12]], 1
+// CHECK1-NEXT:    [[CMP9:%.*]] = icmp slt i32 [[TMP11]], [[ADD8]]
+// CHECK1-NEXT:    br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END15:%.*]]
+// CHECK1:       for.body10:
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    store i32 [[TMP13]], ptr @_ZZ15split_decrementE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr @_ZZ15split_decrementE7.omp.iv, align 4
+// CHECK1-NEXT:    [[MUL11:%.*]] = mul nsw i32 [[TMP15]], 1
+// CHECK1-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP14]], [[MUL11]]
+// CHECK1-NEXT:    store i32 [[SUB12]], ptr [[I]], align 4
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP16]])
+// CHECK1-NEXT:    br label [[FOR_INC13:%.*]]
+// CHECK1:       for.inc13:
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    [[INC14:%.*]] = add nsw i32 [[TMP17]], 1
+// CHECK1-NEXT:    store i32 [[INC14]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND7]], !llvm.loop [[LOOP28:![0-9]+]]
+// CHECK1:       for.end15:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @_Z15split_range_forv(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[A:%.*]] = alloca [4 x i32], align 16
+// CHECK1-NEXT:    [[__RANGE1:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__END1:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[__BEGIN1:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTSPLIT_IV_0___BEGIN1:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[X:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTSPLIT_IV_1___BEGIN1:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[X13:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]], ptr align 16 @__const._Z15split_range_forv.a, i64 16, i1 false)
+// CHECK1-NEXT:    store ptr [[A]], ptr [[__RANGE1]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK1-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP0]], i64 0, i64 0
+// CHECK1-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY]], i64 4
+// CHECK1-NEXT:    store ptr [[ADD_PTR]], ptr [[__END1]], align 8
+// CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK1-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP1]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY1]], ptr [[__BEGIN1]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK1-NEXT:    [[ARRAYDECAY2:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP2]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr [[ARRAYDECAY2]], ptr [[DOTCAPTURE_EXPR_]], align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[__END1]], align 8
+// CHECK1-NEXT:    store ptr [[TMP3]], ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK1-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8
+// CHECK1-NEXT:    [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP4]] to i64
+// CHECK1-NEXT:    [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64
+// CHECK1-NEXT:    [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK1-NEXT:    [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 4
+// CHECK1-NEXT:    [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i64 [[SUB]], 1
+// CHECK1-NEXT:    [[DIV:%.*]] = sdiv i64 [[ADD]], 1
+// CHECK1-NEXT:    [[SUB5:%.*]] = sub nsw i64 [[DIV]], 1
+// CHECK1-NEXT:    store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK1-NEXT:    store i64 0, ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8
+// CHECK1-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK1:       for.cond:
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i64, ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp slt i64 [[TMP6]], 2
+// CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK1:       for.body:
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i64, ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8
+// CHECK1-NEXT:    store i64 [[TMP7]], ptr @_ZZ15split_range_forvE7.omp.iv, align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i64, ptr @_ZZ15split_range_forvE7.omp.iv, align 8
+// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i64 [[TMP9]], 1
+// CHECK1-NEXT:    [[ADD_PTR6:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[MUL]]
+// CHECK1-NEXT:    store ptr [[ADD_PTR6]], ptr [[__BEGIN1]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
+// CHECK1-NEXT:    store ptr [[TMP10]], ptr [[X]], align 8
+// CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[X]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP12]])
+// CHECK1-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK1:       for.inc:
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i64, ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i64 [[TMP13]], 1
+// CHECK1-NEXT:    store i64 [[INC]], ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8
+// CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK1:       for.end:
+// CHECK1-NEXT:    store i64 2, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
+// CHECK1-NEXT:    br label [[FOR_COND7:%.*]]
+// CHECK1:       for.cond7:
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i64, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK1-NEXT:    [[ADD8:%.*]] = add nsw i64 [[TMP15]], 1
+// CHECK1-NEXT:    [[CMP9:%.*]] = icmp slt i64 [[TMP14]], [[ADD8]]
+// CHECK1-NEXT:    br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END16:%.*]]
+// CHECK1:       for.body10:
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i64, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
+// CHECK1-NEXT:    store i64 [[TMP16]], ptr @_ZZ15split_range_forvE7.omp.iv, align 8
+// CHECK1-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i64, ptr @_ZZ15split_range_forvE7.omp.iv, align 8
+// CHECK1-NEXT:    [[MUL11:%.*]] = mul nsw i64 [[TMP18]], 1
+// CHECK1-NEXT:    [[ADD_PTR12:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[MUL11]]
+// CHECK1-NEXT:    store ptr [[ADD_PTR12]], ptr [[__BEGIN1]], align 8
+// CHECK1-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
+// CHECK1-NEXT:    store ptr [[TMP19]], ptr [[X13]], align 8
+// CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[X]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
+// CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP21]])
+// CHECK1-NEXT:    br label [[FOR_INC14:%.*]]
+// CHECK1:       for.inc14:
+// CHECK1-NEXT:    [[TMP22:%.*]] = load i64, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
+// CHECK1-NEXT:    [[INC15:%.*]] = add nsw i64 [[TMP22]], 1
+// CHECK1-NEXT:    store i64 [[INC15]], ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
+// CHECK1-NEXT:    br label [[FOR_COND7]], !llvm.loop [[LOOP30:![0-9]+]]
+// CHECK1:       for.end16:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define internal void @_GLOBAL__sub_I_split_codegen.cpp(
+// CHECK1-SAME: ) #[[ATTR1]] section ".text.startup" {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    call void @__cxx_global_var_init()
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define internal void @__cxx_global_var_init(
+// CHECK2-SAME: ) #[[ATTR0:[0-9]+]] section ".text.startup" {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    call void @_ZN1SC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @s)
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define linkonce_odr void @_ZN1SC1Ev(
+// CHECK2-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    call void @_ZN1SC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]])
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define linkonce_odr void @_ZN1SC2Ev(
+// CHECK2-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[I2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[I:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0
+// CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT:    [[I3:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 0
+// CHECK2-NEXT:    store ptr [[I3]], ptr [[I2]], align 8
+// CHECK2-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK2:       for.cond:
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 5
+// CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK2:       for.body:
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP1]], ptr @_ZZN1SC1EvE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr @_ZZN1SC1EvE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2:![0-9]+]], !align [[META3:![0-9]+]]
+// CHECK2-NEXT:    store i32 [[ADD]], ptr [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP5]])
+// CHECK2-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK2:       for.inc:
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
+// CHECK2:       for.end:
+// CHECK2-NEXT:    store i32 5, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND4:%.*]]
+// CHECK2:       for.cond4:
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[TMP7]], 20
+// CHECK2-NEXT:    br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END11:%.*]]
+// CHECK2:       for.body6:
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP8]], ptr @_ZZN1SC1EvE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr @_ZZN1SC1EvE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL7:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK2-NEXT:    [[ADD8:%.*]] = add nsw i32 0, [[MUL7]]
+// CHECK2-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK2-NEXT:    store i32 [[ADD8]], ptr [[TMP10]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP12]])
+// CHECK2-NEXT:    br label [[FOR_INC9:%.*]]
+// CHECK2:       for.inc9:
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[INC10:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK2-NEXT:    store i32 [[INC10]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND4]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK2:       for.end11:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @body(
+// CHECK2-SAME: ...) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @_Z15split_range_forv(
+// CHECK2-SAME: ) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[A:%.*]] = alloca [4 x i32], align 16
+// CHECK2-NEXT:    [[__RANGE1:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__END1:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[__BEGIN1:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTSPLIT_IV_0___BEGIN1:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[X:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTSPLIT_IV_1___BEGIN1:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[X13:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]], ptr align 16 @__const._Z15split_range_forv.a, i64 16, i1 false)
+// CHECK2-NEXT:    store ptr [[A]], ptr [[__RANGE1]], align 8
+// CHECK2-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK2-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP0]], i64 0, i64 0
+// CHECK2-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY]], i64 4
+// CHECK2-NEXT:    store ptr [[ADD_PTR]], ptr [[__END1]], align 8
+// CHECK2-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK2-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP1]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY1]], ptr [[__BEGIN1]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK2-NEXT:    [[ARRAYDECAY2:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP2]], i64 0, i64 0
+// CHECK2-NEXT:    store ptr [[ARRAYDECAY2]], ptr [[DOTCAPTURE_EXPR_]], align 8
+// CHECK2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[__END1]], align 8
+// CHECK2-NEXT:    store ptr [[TMP3]], ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK2-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8
+// CHECK2-NEXT:    [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP4]] to i64
+// CHECK2-NEXT:    [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64
+// CHECK2-NEXT:    [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK2-NEXT:    [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i64 [[SUB]], 1
+// CHECK2-NEXT:    [[DIV:%.*]] = sdiv i64 [[ADD]], 1
+// CHECK2-NEXT:    [[SUB5:%.*]] = sub nsw i64 [[DIV]], 1
+// CHECK2-NEXT:    store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK2-NEXT:    store i64 0, ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8
+// CHECK2-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK2:       for.cond:
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i64, ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp slt i64 [[TMP6]], 2
+// CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK2:       for.body:
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i64, ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8
+// CHECK2-NEXT:    store i64 [[TMP7]], ptr @_ZZ15split_range_forvE7.omp.iv, align 8
+// CHECK2-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i64, ptr @_ZZ15split_range_forvE7.omp.iv, align 8
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i64 [[TMP9]], 1
+// CHECK2-NEXT:    [[ADD_PTR6:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[MUL]]
+// CHECK2-NEXT:    store ptr [[ADD_PTR6]], ptr [[__BEGIN1]], align 8
+// CHECK2-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
+// CHECK2-NEXT:    store ptr [[TMP10]], ptr [[X]], align 8
+// CHECK2-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[X]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP12]])
+// CHECK2-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK2:       for.inc:
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i64, ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i64 [[TMP13]], 1
+// CHECK2-NEXT:    store i64 [[INC]], ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8
+// CHECK2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK2:       for.end:
+// CHECK2-NEXT:    store i64 2, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
+// CHECK2-NEXT:    br label [[FOR_COND7:%.*]]
+// CHECK2:       for.cond7:
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i64, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK2-NEXT:    [[ADD8:%.*]] = add nsw i64 [[TMP15]], 1
+// CHECK2-NEXT:    [[CMP9:%.*]] = icmp slt i64 [[TMP14]], [[ADD8]]
+// CHECK2-NEXT:    br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END16:%.*]]
+// CHECK2:       for.body10:
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i64, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
+// CHECK2-NEXT:    store i64 [[TMP16]], ptr @_ZZ15split_range_forvE7.omp.iv, align 8
+// CHECK2-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i64, ptr @_ZZ15split_range_forvE7.omp.iv, align 8
+// CHECK2-NEXT:    [[MUL11:%.*]] = mul nsw i64 [[TMP18]], 1
+// CHECK2-NEXT:    [[ADD_PTR12:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[MUL11]]
+// CHECK2-NEXT:    store ptr [[ADD_PTR12]], ptr [[__BEGIN1]], align 8
+// CHECK2-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
+// CHECK2-NEXT:    store ptr [[TMP19]], ptr [[X13]], align 8
+// CHECK2-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[X]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP21]])
+// CHECK2-NEXT:    br label [[FOR_INC14:%.*]]
+// CHECK2:       for.inc14:
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i64, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
+// CHECK2-NEXT:    [[INC15:%.*]] = add nsw i64 [[TMP22]], 1
+// CHECK2-NEXT:    store i64 [[INC15]], ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
+// CHECK2-NEXT:    br label [[FOR_COND7]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK2:       for.end16:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @split_decrement(
+// CHECK2-SAME: i32 noundef [[N:%.*]]) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
+// CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK2:       for.cond:
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
+// CHECK2-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[ADD]], 2
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP3]], [[SUB3]]
+// CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK2:       for.body:
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP5]], ptr @_ZZ15split_decrementE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr @_ZZ15split_decrementE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
+// CHECK2-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP6]], [[MUL]]
+// CHECK2-NEXT:    store i32 [[SUB4]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP8]])
+// CHECK2-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK2:       for.inc:
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP9]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK2:       for.end:
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1
+// CHECK2-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[ADD5]], 2
+// CHECK2-NEXT:    store i32 [[SUB6]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND7:%.*]]
+// CHECK2:       for.cond7:
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP12]], 1
+// CHECK2-NEXT:    [[CMP9:%.*]] = icmp slt i32 [[TMP11]], [[ADD8]]
+// CHECK2-NEXT:    br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END15:%.*]]
+// CHECK2:       for.body10:
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP13]], ptr @_ZZ15split_decrementE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr @_ZZ15split_decrementE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL11:%.*]] = mul nsw i32 [[TMP15]], 1
+// CHECK2-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP14]], [[MUL11]]
+// CHECK2-NEXT:    store i32 [[SUB12]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP16]])
+// CHECK2-NEXT:    br label [[FOR_INC13:%.*]]
+// CHECK2:       for.inc13:
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[INC14:%.*]] = add nsw i32 [[TMP17]], 1
+// CHECK2-NEXT:    store i32 [[INC14]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND7]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK2:       for.end15:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @split_first_fill(
+// CHECK2-SAME: i32 noundef [[N:%.*]]) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
+// CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK2:       for.cond:
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// CHECK2-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[ADD]], 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP2]], [[SUB3]]
+// CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK2:       for.body:
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP4]], ptr @_ZZ16split_first_fillE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr @_ZZ16split_first_fillE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP5]], 1
+// CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK2-NEXT:    store i32 [[ADD4]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP6]])
+// CHECK2-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK2:       for.inc:
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
+// CHECK2:       for.end:
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK2-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[ADD5]], 4
+// CHECK2-NEXT:    store i32 [[SUB6]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND7:%.*]]
+// CHECK2:       for.cond7:
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1
+// CHECK2-NEXT:    [[CMP9:%.*]] = icmp slt i32 [[TMP9]], [[ADD8]]
+// CHECK2-NEXT:    br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END15:%.*]]
+// CHECK2:       for.body10:
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP11]], ptr @_ZZ16split_first_fillE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr @_ZZ16split_first_fillE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL11:%.*]] = mul nsw i32 [[TMP12]], 1
+// CHECK2-NEXT:    [[ADD12:%.*]] = add nsw i32 0, [[MUL11]]
+// CHECK2-NEXT:    store i32 [[ADD12]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP13]])
+// CHECK2-NEXT:    br label [[FOR_INC13:%.*]]
+// CHECK2:       for.inc13:
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[INC14:%.*]] = add nsw i32 [[TMP14]], 1
+// CHECK2-NEXT:    store i32 [[INC14]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND7]], !llvm.loop [[LOOP12:![0-9]+]]
+// CHECK2:       for.end15:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @split_mid_fill(
+// CHECK2-SAME: i32 noundef [[N:%.*]]) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
+// CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK2:       for.cond:
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP2]], 2
+// CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK2:       for.body:
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP3]], ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK2-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP5]])
+// CHECK2-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK2:       for.inc:
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]]
+// CHECK2:       for.end:
+// CHECK2-NEXT:    store i32 2, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND3:%.*]]
+// CHECK2:       for.cond3:
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK2-NEXT:    [[SUB5:%.*]] = sub nsw i32 [[ADD4]], 3
+// CHECK2-NEXT:    [[CMP6:%.*]] = icmp slt i32 [[TMP7]], [[SUB5]]
+// CHECK2-NEXT:    br i1 [[CMP6]], label [[FOR_BODY7:%.*]], label [[FOR_END12:%.*]]
+// CHECK2:       for.body7:
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP9]], ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL8:%.*]] = mul nsw i32 [[TMP10]], 1
+// CHECK2-NEXT:    [[ADD9:%.*]] = add nsw i32 0, [[MUL8]]
+// CHECK2-NEXT:    store i32 [[ADD9]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP11]])
+// CHECK2-NEXT:    br label [[FOR_INC10:%.*]]
+// CHECK2:       for.inc10:
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[INC11:%.*]] = add nsw i32 [[TMP12]], 1
+// CHECK2-NEXT:    store i32 [[INC11]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND3]], !llvm.loop [[LOOP14:![0-9]+]]
+// CHECK2:       for.end12:
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK2-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[ADD13]], 3
+// CHECK2-NEXT:    store i32 [[SUB14]], ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND15:%.*]]
+// CHECK2:       for.cond15:
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP15]], 1
+// CHECK2-NEXT:    [[CMP17:%.*]] = icmp slt i32 [[TMP14]], [[ADD16]]
+// CHECK2-NEXT:    br i1 [[CMP17]], label [[FOR_BODY18:%.*]], label [[FOR_END23:%.*]]
+// CHECK2:       for.body18:
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP16]], ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL19:%.*]] = mul nsw i32 [[TMP17]], 1
+// CHECK2-NEXT:    [[ADD20:%.*]] = add nsw i32 0, [[MUL19]]
+// CHECK2-NEXT:    store i32 [[ADD20]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP18]])
+// CHECK2-NEXT:    br label [[FOR_INC21:%.*]]
+// CHECK2:       for.inc21:
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK2-NEXT:    [[INC22:%.*]] = add nsw i32 [[TMP19]], 1
+// CHECK2-NEXT:    store i32 [[INC22]], ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND15]], !llvm.loop [[LOOP15:![0-9]+]]
+// CHECK2:       for.end23:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @split_neg_start(
+// CHECK2-SAME: i32 noundef [[N:%.*]]) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK2-NEXT:    store i32 -1, ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub i32 [[TMP1]], -2
+// CHECK2-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 1
+// CHECK2-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK2:       for.cond:
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp ult i32 [[TMP2]], 1
+// CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK2:       for.body:
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP3]], ptr @_ZZ15split_neg_startE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ15split_neg_startE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul i32 [[TMP4]], 1
+// CHECK2-NEXT:    [[ADD:%.*]] = add i32 -1, [[MUL]]
+// CHECK2-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP5]])
+// CHECK2-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK2:       for.inc:
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add i32 [[TMP6]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK2:       for.end:
+// CHECK2-NEXT:    store i32 1, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND3:%.*]]
+// CHECK2:       for.cond3:
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD4:%.*]] = add i32 [[TMP8]], 1
+// CHECK2-NEXT:    [[SUB5:%.*]] = sub i32 [[ADD4]], 1
+// CHECK2-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP7]], [[SUB5]]
+// CHECK2-NEXT:    br i1 [[CMP6]], label [[FOR_BODY7:%.*]], label [[FOR_END12:%.*]]
+// CHECK2:       for.body7:
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP9]], ptr @_ZZ15split_neg_startE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr @_ZZ15split_neg_startE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL8:%.*]] = mul i32 [[TMP10]], 1
+// CHECK2-NEXT:    [[ADD9:%.*]] = add i32 -1, [[MUL8]]
+// CHECK2-NEXT:    store i32 [[ADD9]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP11]])
+// CHECK2-NEXT:    br label [[FOR_INC10:%.*]]
+// CHECK2:       for.inc10:
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[INC11:%.*]] = add i32 [[TMP12]], 1
+// CHECK2-NEXT:    store i32 [[INC11]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND3]], !llvm.loop [[LOOP17:![0-9]+]]
+// CHECK2:       for.end12:
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD13:%.*]] = add i32 [[TMP13]], 1
+// CHECK2-NEXT:    [[SUB14:%.*]] = sub i32 [[ADD13]], 1
+// CHECK2-NEXT:    store i32 [[SUB14]], ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND15:%.*]]
+// CHECK2:       for.cond15:
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD16:%.*]] = add i32 [[TMP15]], 1
+// CHECK2-NEXT:    [[CMP17:%.*]] = icmp ult i32 [[TMP14]], [[ADD16]]
+// CHECK2-NEXT:    br i1 [[CMP17]], label [[FOR_BODY18:%.*]], label [[FOR_END23:%.*]]
+// CHECK2:       for.body18:
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP16]], ptr @_ZZ15split_neg_startE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr @_ZZ15split_neg_startE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL19:%.*]] = mul i32 [[TMP17]], 1
+// CHECK2-NEXT:    [[ADD20:%.*]] = add i32 -1, [[MUL19]]
+// CHECK2-NEXT:    store i32 [[ADD20]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP18]])
+// CHECK2-NEXT:    br label [[FOR_INC21:%.*]]
+// CHECK2:       for.inc21:
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK2-NEXT:    [[INC22:%.*]] = add i32 [[TMP19]], 1
+// CHECK2-NEXT:    store i32 [[INC22]], ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND15]], !llvm.loop [[LOOP18:![0-9]+]]
+// CHECK2:       for.end23:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @split_only_fill(
+// CHECK2-SAME: i32 noundef [[N:%.*]]) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
+// CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK2:       for.cond:
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP2]], [[ADD]]
+// CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK2:       for.body:
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP4]], ptr @_ZZ15split_only_fillE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr @_ZZ15split_only_fillE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP5]], 1
+// CHECK2-NEXT:    [[ADD3:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK2-NEXT:    store i32 [[ADD3]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP6]])
+// CHECK2-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK2:       for.inc:
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK2:       for.end:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @split_step2(
+// CHECK2-SAME: i32 noundef [[N:%.*]]) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub i32 [[TMP1]], -1
+// CHECK2-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 2
+// CHECK2-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK2:       for.cond:
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp ult i32 [[TMP2]], 3
+// CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK2:       for.body:
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP3]], ptr @_ZZ11split_step2E7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ11split_step2E7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul i32 [[TMP4]], 2
+// CHECK2-NEXT:    [[ADD:%.*]] = add i32 0, [[MUL]]
+// CHECK2-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP5]])
+// CHECK2-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK2:       for.inc:
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add i32 [[TMP6]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
+// CHECK2:       for.end:
+// CHECK2-NEXT:    store i32 3, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND3:%.*]]
+// CHECK2:       for.cond3:
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD4:%.*]] = add i32 [[TMP8]], 1
+// CHECK2-NEXT:    [[CMP5:%.*]] = icmp ult i32 [[TMP7]], [[ADD4]]
+// CHECK2-NEXT:    br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END11:%.*]]
+// CHECK2:       for.body6:
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP9]], ptr @_ZZ11split_step2E7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr @_ZZ11split_step2E7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL7:%.*]] = mul i32 [[TMP10]], 2
+// CHECK2-NEXT:    [[ADD8:%.*]] = add i32 0, [[MUL7]]
+// CHECK2-NEXT:    store i32 [[ADD8]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP11]])
+// CHECK2-NEXT:    br label [[FOR_INC9:%.*]]
+// CHECK2:       for.inc9:
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[INC10:%.*]] = add i32 [[TMP12]], 1
+// CHECK2-NEXT:    store i32 [[INC10]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND3]], !llvm.loop [[LOOP21:![0-9]+]]
+// CHECK2:       for.end11:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @split_three_const(
+// CHECK2-SAME: i32 noundef [[N:%.*]]) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
+// CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK2:       for.cond:
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP2]], 2
+// CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK2:       for.body:
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP3]], ptr @_ZZ17split_three_constE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ17split_three_constE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK2-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP5]])
+// CHECK2-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK2:       for.inc:
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+// CHECK2:       for.end:
+// CHECK2-NEXT:    store i32 2, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND3:%.*]]
+// CHECK2:       for.cond3:
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[CMP4:%.*]] = icmp slt i32 [[TMP7]], 5
+// CHECK2-NEXT:    br i1 [[CMP4]], label [[FOR_BODY5:%.*]], label [[FOR_END10:%.*]]
+// CHECK2:       for.body5:
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP8]], ptr @_ZZ17split_three_constE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr @_ZZ17split_three_constE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL6:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK2-NEXT:    [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
+// CHECK2-NEXT:    store i32 [[ADD7]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP10]])
+// CHECK2-NEXT:    br label [[FOR_INC8:%.*]]
+// CHECK2:       for.inc8:
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[INC9:%.*]] = add nsw i32 [[TMP11]], 1
+// CHECK2-NEXT:    store i32 [[INC9]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND3]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK2:       for.end10:
+// CHECK2-NEXT:    store i32 5, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND11:%.*]]
+// CHECK2:       for.cond11:
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD12:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK2-NEXT:    [[CMP13:%.*]] = icmp slt i32 [[TMP12]], [[ADD12]]
+// CHECK2-NEXT:    br i1 [[CMP13]], label [[FOR_BODY14:%.*]], label [[FOR_END19:%.*]]
+// CHECK2:       for.body14:
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP14]], ptr @_ZZ17split_three_constE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr @_ZZ17split_three_constE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL15:%.*]] = mul nsw i32 [[TMP15]], 1
+// CHECK2-NEXT:    [[ADD16:%.*]] = add nsw i32 0, [[MUL15]]
+// CHECK2-NEXT:    store i32 [[ADD16]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP16]])
+// CHECK2-NEXT:    br label [[FOR_INC17:%.*]]
+// CHECK2:       for.inc17:
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK2-NEXT:    [[INC18:%.*]] = add nsw i32 [[TMP17]], 1
+// CHECK2-NEXT:    store i32 [[INC18]], ptr [[DOTSPLIT_IV_2_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND11]], !llvm.loop [[LOOP24:![0-9]+]]
+// CHECK2:       for.end19:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @split_two_const_trip(
+// CHECK2-SAME: ) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK2:       for.cond:
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 3
+// CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK2:       for.body:
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP1]], ptr @_ZZ20split_two_const_tripE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr @_ZZ20split_two_const_tripE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK2-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP3]])
+// CHECK2-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK2:       for.inc:
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP4]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK2:       for.end:
+// CHECK2-NEXT:    store i32 3, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND1:%.*]]
+// CHECK2:       for.cond1:
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10
+// CHECK2-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END8:%.*]]
+// CHECK2:       for.body3:
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP6]], ptr @_ZZ20split_two_const_tripE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr @_ZZ20split_two_const_tripE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL4:%.*]] = mul nsw i32 [[TMP7]], 1
+// CHECK2-NEXT:    [[ADD5:%.*]] = add nsw i32 0, [[MUL4]]
+// CHECK2-NEXT:    store i32 [[ADD5]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP8]])
+// CHECK2-NEXT:    br label [[FOR_INC6:%.*]]
+// CHECK2:       for.inc6:
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[INC7:%.*]] = add nsw i32 [[TMP9]], 1
+// CHECK2-NEXT:    store i32 [[INC7]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK2:       for.end8:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @split_var_trip(
+// CHECK2-SAME: i32 noundef [[N:%.*]]) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
+// CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK2:       for.cond:
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP2]], 3
+// CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK2:       for.body:
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP3]], ptr @_ZZ14split_var_tripE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ14split_var_tripE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK2-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP5]])
+// CHECK2-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK2:       for.inc:
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]]
+// CHECK2:       for.end:
+// CHECK2-NEXT:    store i32 3, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND3:%.*]]
+// CHECK2:       for.cond3:
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK2-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[TMP7]], [[ADD4]]
+// CHECK2-NEXT:    br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END11:%.*]]
+// CHECK2:       for.body6:
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP9]], ptr @_ZZ14split_var_tripE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr @_ZZ14split_var_tripE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL7:%.*]] = mul nsw i32 [[TMP10]], 1
+// CHECK2-NEXT:    [[ADD8:%.*]] = add nsw i32 0, [[MUL7]]
+// CHECK2-NEXT:    store i32 [[ADD8]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP11]])
+// CHECK2-NEXT:    br label [[FOR_INC9:%.*]]
+// CHECK2:       for.inc9:
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[INC10:%.*]] = add nsw i32 [[TMP12]], 1
+// CHECK2-NEXT:    store i32 [[INC10]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND3]], !llvm.loop [[LOOP28:![0-9]+]]
+// CHECK2:       for.end11:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @split_zero_first(
+// CHECK2-SAME: ) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK2:       for.cond:
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 0
+// CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK2:       for.body:
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP1]], ptr @_ZZ16split_zero_firstE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr @_ZZ16split_zero_firstE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK2-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP3]])
+// CHECK2-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK2:       for.inc:
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP4]], 1
+// CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK2:       for.end:
+// CHECK2-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND1:%.*]]
+// CHECK2:       for.cond1:
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10
+// CHECK2-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END8:%.*]]
+// CHECK2:       for.body3:
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    store i32 [[TMP6]], ptr @_ZZ16split_zero_firstE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr @_ZZ16split_zero_firstE7.omp.iv, align 4
+// CHECK2-NEXT:    [[MUL4:%.*]] = mul nsw i32 [[TMP7]], 1
+// CHECK2-NEXT:    [[ADD5:%.*]] = add nsw i32 0, [[MUL4]]
+// CHECK2-NEXT:    store i32 [[ADD5]], ptr [[I]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP8]])
+// CHECK2-NEXT:    br label [[FOR_INC6:%.*]]
+// CHECK2:       for.inc6:
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    [[INC7:%.*]] = add nsw i32 [[TMP9]], 1
+// CHECK2-NEXT:    store i32 [[INC7]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP30:![0-9]+]]
+// CHECK2:       for.end8:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define internal void @_GLOBAL__sub_I_split_codegen.cpp(
+// CHECK2-SAME: ) #[[ATTR0]] section ".text.startup" {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    call void @__cxx_global_var_init()
+// CHECK2-NEXT:    ret void
+//
+//.
+// CHECK1: [[META2]] = !{}
+// CHECK1: [[META3]] = !{i64 4}
+// CHECK1: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]]}
+// CHECK1: [[META5]] = !{!"llvm.loop.mustprogress"}
+// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META5]]}
+// CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META5]]}
+// CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META5]]}
+// CHECK1: [[LOOP9]] = distinct !{[[LOOP9]], [[META5]]}
+// CHECK1: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]]}
+// CHECK1: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]]}
+// CHECK1: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]]}
+// CHECK1: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]]}
+// CHECK1: [[LOOP14]] = distinct !{[[LOOP14]], [[META5]]}
+// CHECK1: [[LOOP15]] = distinct !{[[LOOP15]], [[META5]]}
+// CHECK1: [[LOOP16]] = distinct !{[[LOOP16]], [[META5]]}
+// CHECK1: [[LOOP17]] = distinct !{[[LOOP17]], [[META5]]}
+// CHECK1: [[LOOP18]] = distinct !{[[LOOP18]], [[META5]]}
+// CHECK1: [[LOOP19]] = distinct !{[[LOOP19]], [[META5]]}
+// CHECK1: [[LOOP20]] = distinct !{[[LOOP20]], [[META5]]}
+// CHECK1: [[LOOP21]] = distinct !{[[LOOP21]], [[META5]]}
+// CHECK1: [[LOOP22]] = distinct !{[[LOOP22]], [[META5]]}
+// CHECK1: [[LOOP23]] = distinct !{[[LOOP23]], [[META5]]}
+// CHECK1: [[LOOP24]] = distinct !{[[LOOP24]], [[META5]]}
+// CHECK1: [[LOOP25]] = distinct !{[[LOOP25]], [[META5]]}
+// CHECK1: [[LOOP26]] = distinct !{[[LOOP26]], [[META5]]}
+// CHECK1: [[LOOP27]] = distinct !{[[LOOP27]], [[META5]]}
+// CHECK1: [[LOOP28]] = distinct !{[[LOOP28]], [[META5]]}
+// CHECK1: [[LOOP29]] = distinct !{[[LOOP29]], [[META5]]}
+// CHECK1: [[LOOP30]] = distinct !{[[LOOP30]], [[META5]]}
+//.
+// CHECK2: [[META2]] = !{}
+// CHECK2: [[META3]] = !{i64 4}
+// CHECK2: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]]}
+// CHECK2: [[META5]] = !{!"llvm.loop.mustprogress"}
+// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META5]]}
+// CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META5]]}
+// CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META5]]}
+// CHECK2: [[LOOP9]] = distinct !{[[LOOP9]], [[META5]]}
+// CHECK2: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]]}
+// CHECK2: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]]}
+// CHECK2: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]]}
+// CHECK2: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]]}
+// CHECK2: [[LOOP14]] = distinct !{[[LOOP14]], [[META5]]}
+// CHECK2: [[LOOP15]] = distinct !{[[LOOP15]], [[META5]]}
+// CHECK2: [[LOOP16]] = distinct !{[[LOOP16]], [[META5]]}
+// CHECK2: [[LOOP17]] = distinct !{[[LOOP17]], [[META5]]}
+// CHECK2: [[LOOP18]] = distinct !{[[LOOP18]], [[META5]]}
+// CHECK2: [[LOOP19]] = distinct !{[[LOOP19]], [[META5]]}
+// CHECK2: [[LOOP20]] = distinct !{[[LOOP20]], [[META5]]}
+// CHECK2: [[LOOP21]] = distinct !{[[LOOP21]], [[META5]]}
+// CHECK2: [[LOOP22]] = distinct !{[[LOOP22]], [[META5]]}
+// CHECK2: [[LOOP23]] = distinct !{[[LOOP23]], [[META5]]}
+// CHECK2: [[LOOP24]] = distinct !{[[LOOP24]], [[META5]]}
+// CHECK2: [[LOOP25]] = distinct !{[[LOOP25]], [[META5]]}
+// CHECK2: [[LOOP26]] = distinct !{[[LOOP26]], [[META5]]}
+// CHECK2: [[LOOP27]] = distinct !{[[LOOP27]], [[META5]]}
+// CHECK2: [[LOOP28]] = distinct !{[[LOOP28]], [[META5]]}
+// CHECK2: [[LOOP29]] = distinct !{[[LOOP29]], [[META5]]}
+// CHECK2: [[LOOP30]] = distinct !{[[LOOP30]], [[META5]]}
+//.
diff --git a/clang/test/OpenMP/split_test.c b/clang/test/OpenMP/split_test.c
deleted file mode 100644
index af9686ce1f729..0000000000000
--- a/clang/test/OpenMP/split_test.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Simple test for #pragma omp split counts: one for-loop is transformed
- * into two loops (counts(5, omp_fill) with n=10 => [0..5) and [5..10)).
- */
-// Verify the split directive compiles and emits IR (two sequential loops).
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
-
-int main(void) {
-  const int n = 10;
-  int sum = 0;
-
-#pragma omp split counts(5, omp_fill)
-  for (int i = 0; i < n; ++i) {
-    sum += i;
-  }
-
-  return (sum == n * (n - 1) / 2) ? 0 : 1;
-}
-
-// CHECK: define
-// CHECK: load
-// Split produces two sequential loops (counts(5, omp_fill) with n=10).
-// CHECK: .split.iv
-// CHECK: icmp slt i32 {{.*}}, 5
-// CHECK: .split.iv
-// CHECK: icmp slt
-// CHECK: br i1

>From 47964f6ffa9c02a6765c39c9aa745335abf1befd Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Tue, 31 Mar 2026 06:10:07 -0400
Subject: [PATCH 25/32] diag-parse-sema tests

---
 clang/test/OpenMP/split_diag_errors.c      | 61 +++++++++++++
 clang/test/OpenMP/split_messages.cpp       | 99 ++++++++++++++++++++++
 clang/test/OpenMP/split_openmp_version.cpp | 22 +++++
 3 files changed, 182 insertions(+)
 create mode 100644 clang/test/OpenMP/split_diag_errors.c
 create mode 100644 clang/test/OpenMP/split_messages.cpp
 create mode 100644 clang/test/OpenMP/split_openmp_version.cpp

diff --git a/clang/test/OpenMP/split_diag_errors.c b/clang/test/OpenMP/split_diag_errors.c
new file mode 100644
index 0000000000000..bc71eae5655e6
--- /dev/null
+++ b/clang/test/OpenMP/split_diag_errors.c
@@ -0,0 +1,61 @@
+/*
+ * Error and delayed-transformation cases for #pragma omp split counts(...).
+ */
+// 1) Required clause missing: err_omp_required_clause
+// RUN: not %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -DTEST_REQUIRED_CLAUSE %s 2>&1 | FileCheck %s --check-prefix=REQ
+// 2) counts(negative): non-negative diagnostic
+// RUN: not %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -DTEST_NEGATIVE %s 2>&1 | FileCheck %s --check-prefix=NEG
+// 3) counts(non-integer): integral type diagnostic
+// RUN: not %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -DTEST_FLOAT %s 2>&1 | FileCheck %s --check-prefix=FLOAT
+// 6) Loop not transformable (while): must be a for loop
+// RUN: not %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -DTEST_WHILE %s 2>&1 | FileCheck %s --check-prefix=WHILE
+// Two invalid counts — two diagnostics on the clause
+// RUN: not %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -DTEST_DOUBLE_NEGATIVE %s 2>&1 | FileCheck %s --check-prefix=DBLNEG
+
+#ifdef TEST_REQUIRED_CLAUSE
+void test_required_clause_missing(void) {
+#pragma omp split
+  for (int i = 0; i < 10; ++i) {
+  }
+}
+// REQ: error: {{.*}}requires the 'counts' clause
+#endif
+
+#ifdef TEST_NEGATIVE
+void test_negative_count(void) {
+#pragma omp split counts(-1, omp_fill)
+  for (int i = 0; i < 10; ++i) {
+  }
+}
+// NEG: error: {{.*}}counts{{.*}}non-negative integer
+#endif
+
+#ifdef TEST_FLOAT
+void test_float_count(void) {
+#pragma omp split counts(2.5, omp_fill)
+  for (int i = 0; i < 10; ++i) {
+  }
+}
+// FLOAT: error: {{.*}}integral or unscoped enumeration type
+#endif
+
+#ifdef TEST_WHILE
+void test_while_not_for(void) {
+  int i = 0;
+#pragma omp split counts(5, omp_fill)
+  while (i < 10) {
+    ++i;
+  }
+}
+// WHILE: error: {{.*}}must be a for loop
+#endif
+
+#ifdef TEST_DOUBLE_NEGATIVE
+void test_two_negative_counts(void) {
+#pragma omp split counts(-1, -1, omp_fill)
+  for (int i = 0; i < 10; ++i) {
+  }
+}
+// DBLNEG: error: {{.*}}counts{{.*}}non-negative integer
+// DBLNEG: error: {{.*}}counts{{.*}}non-negative integer
+#endif
diff --git a/clang/test/OpenMP/split_messages.cpp b/clang/test/OpenMP/split_messages.cpp
new file mode 100644
index 0000000000000..e559750bb0855
--- /dev/null
+++ b/clang/test/OpenMP/split_messages.cpp
@@ -0,0 +1,99 @@
+// OpenMP split / counts: parse and semantic diagnostics.
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++17 -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s
+
+void body(int);
+
+void parse_and_clause_errors() {
+
+  // Malformed `counts` — missing '('
+  // expected-error at +1 {{expected '('}}
+  #pragma omp split counts
+    ;
+
+  // Empty `counts` list
+  // expected-error at +1 {{expected expression}}
+  #pragma omp split counts()
+    ;
+
+  // Truncated list / missing ')'
+  // expected-error at +1 {{expected ')'}} expected-note at +1 {{to match this '('}}
+  #pragma omp split counts(3
+    for (int i = 0; i < 7; ++i)
+      ;
+
+  // Trailing comma only
+  // expected-error at +1 {{expected expression}}
+  #pragma omp split counts(3,)
+    ;
+
+  // Expression after comma missing
+  // expected-error at +2 {{expected expression}}
+  // expected-error at +1 {{expected ')'}} expected-note at +1 {{to match this '('}}
+  #pragma omp split counts(3,
+    ;
+
+  // Incomplete arithmetic in count (like `tile_messages` sizes(5+))
+  // expected-error at +2 {{expected expression}}
+  // expected-error at +1 {{expected ')'}} expected-note at +1 {{to match this '('}}
+  #pragma omp split counts(5+
+    ;
+
+  // `for` keyword not a constant-expression operand
+  // expected-error at +1 {{expected expression}}
+  #pragma omp split counts(for)
+    ;
+
+  // Duplicate `counts` clauses
+  // expected-error at +1 {{directive '#pragma omp split' cannot contain more than one 'counts' clause}}
+  #pragma omp split counts(2, omp_fill) counts(3, omp_fill)
+  for (int i = 0; i < 7; ++i)
+    ;
+
+  // Disallowed extra clause
+  // expected-error at +1 {{unexpected OpenMP clause 'collapse' in directive '#pragma omp split'}}
+  #pragma omp split counts(2, omp_fill) collapse(2)
+  for (int i = 0; i < 7; ++i)
+    ;
+
+  // Non-relational loop condition (canonical loop check)
+  #pragma omp split counts(omp_fill)
+  // expected-error at +1 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'i'}}
+  for (int i = 0; i / 3 < 7; ++i)
+    ;
+
+  // More than one `omp_fill`
+  // expected-error at +1 {{exactly one 'omp_fill' must appear in the 'counts' clause}}
+  #pragma omp split counts(omp_fill, omp_fill)
+  for (int i = 0; i < 10; ++i)
+    body(i);
+
+  // No `omp_fill` at all — also triggers "exactly one" diagnostic.
+  // expected-error at +1 {{exactly one 'omp_fill' must appear in the 'counts' clause}}
+  #pragma omp split counts(2, 3)
+  for (int i = 0; i < 10; ++i)
+    body(i);
+
+  // Positive: `omp_fill` may appear at any position in `counts` (not required to be last).
+  #pragma omp split counts(omp_fill, 2)
+  for (int i = 0; i < 10; ++i)
+    body(i);
+}
+
+void associated_statement_diagnostics() {
+  {
+    // expected-error at +2 {{expected statement}}
+    #pragma omp split counts(omp_fill)
+  }
+
+  // Not a `for` loop (contrast `split_diag_errors.c` / `while`)
+  // expected-error at +2 {{statement after '#pragma omp split' must be a for loop}}
+  #pragma omp split counts(omp_fill)
+  int b = 0;
+
+  // expected-warning at +2 {{extra tokens at the end of '#pragma omp split' are ignored}}
+  // expected-error at +1 {{directive '#pragma omp split' requires the 'counts' clause}}
+  #pragma omp split foo
+  for (int i = 0; i < 7; ++i)
+    ;
+}
diff --git a/clang/test/OpenMP/split_openmp_version.cpp b/clang/test/OpenMP/split_openmp_version.cpp
new file mode 100644
index 0000000000000..d49d50970d0db
--- /dev/null
+++ b/clang/test/OpenMP/split_openmp_version.cpp
@@ -0,0 +1,22 @@
+// `#pragma omp split` / `counts` require OpenMP 6.x in this implementation.
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++17 -fopenmp -fopenmp-version=60 -fsyntax-only -DONLY_OK -verify %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++17 -fopenmp -fopenmp-version=52 -fsyntax-only -DONLY_BAD52 -verify=expected52 %s
+
+#if defined(ONLY_OK)
+void ok60(void) {
+#pragma omp split counts(omp_fill)
+  for (int i = 0; i < 10; ++i) {
+  }
+}
+// expected-no-diagnostics
+#endif
+
+#if defined(ONLY_BAD52)
+// expected52-error at +2 {{unexpected OpenMP clause 'counts' in directive '#pragma omp split'}}
+void bad52(void) {
+#pragma omp split counts(omp_fill)
+  for (int i = 0; i < 10; ++i) {
+  }
+}
+#endif

>From 81757212c485c226f13b04bb7272510f423b834c Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Tue, 31 Mar 2026 06:21:46 -0400
Subject: [PATCH 26/32] ast-serilize-print-etc-tests

---
 clang/test/OpenMP/split_analyze.c             |  10 ++
 clang/test/OpenMP/split_ast_print.cpp         |  60 +++++++--
 clang/test/OpenMP/split_composition.cpp       |  17 +++
 .../test/OpenMP/split_compound_associated.cpp |  13 ++
 clang/test/OpenMP/split_counts_constexpr.cpp  |  19 +++
 clang/test/OpenMP/split_counts_ice.c          |  56 ++++++++
 clang/test/OpenMP/split_counts_verify.c       | 122 +++++++++++++++---
 .../OpenMP/split_distribute_inner_split.cpp   |  14 ++
 clang/test/OpenMP/split_driver_smoke.c        |  12 ++
 clang/test/OpenMP/split_iv_types.c            |  24 ++++
 clang/test/OpenMP/split_loop_styles.cpp       |  14 ++
 clang/test/OpenMP/split_member_ctor.cpp       |  20 +++
 clang/test/OpenMP/split_nested_outer_only.c   |  12 ++
 clang/test/OpenMP/split_offload_codegen.cpp   |  27 ++++
 clang/test/OpenMP/split_opts_simd_debug.cpp   |  30 +++++
 clang/test/OpenMP/split_parallel_split.cpp    |  15 +++
 clang/test/OpenMP/split_pch_codegen.cpp       |  43 ++++++
 clang/test/OpenMP/split_range_for_diag.cpp    |  25 ++++
 clang/test/OpenMP/split_serialize_module.cpp  |  24 ++++
 clang/test/OpenMP/split_teams_nesting.cpp     |  13 ++
 clang/test/OpenMP/split_template_nttp.cpp     |  15 +++
 clang/test/OpenMP/split_templates.cpp         |  30 +++++
 clang/test/OpenMP/split_trip_volatile.c       |  14 ++
 23 files changed, 600 insertions(+), 29 deletions(-)
 create mode 100644 clang/test/OpenMP/split_analyze.c
 create mode 100644 clang/test/OpenMP/split_composition.cpp
 create mode 100644 clang/test/OpenMP/split_compound_associated.cpp
 create mode 100644 clang/test/OpenMP/split_counts_constexpr.cpp
 create mode 100644 clang/test/OpenMP/split_counts_ice.c
 create mode 100644 clang/test/OpenMP/split_distribute_inner_split.cpp
 create mode 100644 clang/test/OpenMP/split_driver_smoke.c
 create mode 100644 clang/test/OpenMP/split_iv_types.c
 create mode 100644 clang/test/OpenMP/split_loop_styles.cpp
 create mode 100644 clang/test/OpenMP/split_member_ctor.cpp
 create mode 100644 clang/test/OpenMP/split_nested_outer_only.c
 create mode 100644 clang/test/OpenMP/split_offload_codegen.cpp
 create mode 100644 clang/test/OpenMP/split_opts_simd_debug.cpp
 create mode 100644 clang/test/OpenMP/split_parallel_split.cpp
 create mode 100644 clang/test/OpenMP/split_pch_codegen.cpp
 create mode 100644 clang/test/OpenMP/split_range_for_diag.cpp
 create mode 100644 clang/test/OpenMP/split_serialize_module.cpp
 create mode 100644 clang/test/OpenMP/split_teams_nesting.cpp
 create mode 100644 clang/test/OpenMP/split_template_nttp.cpp
 create mode 100644 clang/test/OpenMP/split_templates.cpp
 create mode 100644 clang/test/OpenMP/split_trip_volatile.c

diff --git a/clang/test/OpenMP/split_analyze.c b/clang/test/OpenMP/split_analyze.c
new file mode 100644
index 0000000000000..133ec8553016e
--- /dev/null
+++ b/clang/test/OpenMP/split_analyze.c
@@ -0,0 +1,10 @@
+// Static analyzer invocation on split loop (no crash).
+// RUN: %clang -target x86_64-unknown-linux-gnu --analyze -fopenmp -fopenmp-version=60 %s -o %t.plist
+
+void g(int);
+
+void f(int n) {
+#pragma omp split counts(2, omp_fill)
+  for (int i = 0; i < n; ++i)
+    g(i);
+}
diff --git a/clang/test/OpenMP/split_ast_print.cpp b/clang/test/OpenMP/split_ast_print.cpp
index 4707d88e9328c..9673882bc6778 100644
--- a/clang/test/OpenMP/split_ast_print.cpp
+++ b/clang/test/OpenMP/split_ast_print.cpp
@@ -1,11 +1,10 @@
-// Check no warnings/errors and that split is recognized
+// AST dump + ast-print round-trip for omp_fill at every position in counts().
+//
 // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s
 // expected-no-diagnostics
-
-// Check AST: OMPSplitDirective with associated for-loop
+//
 // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP
-
-// Check unparsing
+//
 // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT
 
 #ifndef HEADER
@@ -13,12 +12,13 @@
 
 extern "C" void body(...);
 
-// PRINT-LABEL: void foo(
-// DUMP-LABEL:  FunctionDecl {{.*}} foo
-void foo(int n) {
+// --- omp_fill at last position: counts(2, omp_fill) ---
+// PRINT-LABEL: void fill_last(
+// DUMP-LABEL:  FunctionDecl {{.*}} fill_last
+void fill_last(int n) {
   // PRINT:     #pragma omp split counts(2, omp_fill)
   // DUMP:      OMPSplitDirective
-  // DUMP: OMPCountsClause
+  // DUMP:        OMPCountsClause
   #pragma omp split counts(2, omp_fill)
   // PRINT: for (int i = 0; i < n; ++i)
   // DUMP:      ForStmt
@@ -26,4 +26,46 @@ void foo(int n) {
     body(i);
 }
 
+// --- omp_fill at first position: counts(omp_fill, 3) ---
+// PRINT-LABEL: void fill_first(
+// DUMP-LABEL:  FunctionDecl {{.*}} fill_first
+void fill_first(int n) {
+  // PRINT:     #pragma omp split counts(omp_fill, 3)
+  // DUMP:      OMPSplitDirective
+  // DUMP:        OMPCountsClause
+  #pragma omp split counts(omp_fill, 3)
+  // PRINT: for (int i = 0; i < n; ++i)
+  // DUMP:      ForStmt
+  for (int i = 0; i < n; ++i)
+    body(i);
+}
+
+// --- omp_fill at middle position: counts(1, omp_fill, 1) ---
+// PRINT-LABEL: void fill_mid(
+// DUMP-LABEL:  FunctionDecl {{.*}} fill_mid
+void fill_mid(int n) {
+  // PRINT:     #pragma omp split counts(1, omp_fill, 1)
+  // DUMP:      OMPSplitDirective
+  // DUMP:        OMPCountsClause
+  #pragma omp split counts(1, omp_fill, 1)
+  // PRINT: for (int i = 0; i < n; ++i)
+  // DUMP:      ForStmt
+  for (int i = 0; i < n; ++i)
+    body(i);
+}
+
+// --- omp_fill as sole item: counts(omp_fill) ---
+// PRINT-LABEL: void fill_only(
+// DUMP-LABEL:  FunctionDecl {{.*}} fill_only
+void fill_only(int n) {
+  // PRINT:     #pragma omp split counts(omp_fill)
+  // DUMP:      OMPSplitDirective
+  // DUMP:        OMPCountsClause
+  #pragma omp split counts(omp_fill)
+  // PRINT: for (int i = 0; i < n; ++i)
+  // DUMP:      ForStmt
+  for (int i = 0; i < n; ++i)
+    body(i);
+}
+
 #endif
diff --git a/clang/test/OpenMP/split_composition.cpp b/clang/test/OpenMP/split_composition.cpp
new file mode 100644
index 0000000000000..b6c07e47466d9
--- /dev/null
+++ b/clang/test/OpenMP/split_composition.cpp
@@ -0,0 +1,17 @@
+// Split nested inside `omp parallel for` outer loop.
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s
+
+extern "C" void body(int, int);
+
+// CHECK: define {{.*}} @_Z11compositionv
+// CHECK: .split.iv
+// CHECK: call void @body
+void composition(void) {
+#pragma omp parallel for
+  for (int i = 0; i < 4; ++i) {
+#pragma omp split counts(2, omp_fill)
+    for (int j = 0; j < 10; ++j)
+      body(i, j);
+  }
+}
diff --git a/clang/test/OpenMP/split_compound_associated.cpp b/clang/test/OpenMP/split_compound_associated.cpp
new file mode 100644
index 0000000000000..125a2d16d875a
--- /dev/null
+++ b/clang/test/OpenMP/split_compound_associated.cpp
@@ -0,0 +1,13 @@
+// Associated statement may be a compound `{ for (...) {} }` (split still finds the loop).
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s
+
+// CHECK-LABEL: define {{.*}} @_Z1fv
+// CHECK: .split.iv
+void f(void) {
+#pragma omp split counts(2, omp_fill)
+  {
+    for (int i = 0; i < 10; ++i) {
+    }
+  }
+}
diff --git a/clang/test/OpenMP/split_counts_constexpr.cpp b/clang/test/OpenMP/split_counts_constexpr.cpp
new file mode 100644
index 0000000000000..d304a9ca1b5a1
--- /dev/null
+++ b/clang/test/OpenMP/split_counts_constexpr.cpp
@@ -0,0 +1,19 @@
+/* C++ `constexpr` locals as `counts` operands (distinct from NTTP in split_template_nttp.cpp). */
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -std=c++17 -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s
+// expected-no-diagnostics
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -std=c++17 -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s
+
+extern "C" void body(int);
+
+// CHECK-LABEL: define {{.*}} @from_constexpr
+// CHECK: .split.iv.0
+// CHECK: icmp slt i32 {{.*}}, 4
+// CHECK: .split.iv.1
+// CHECK: icmp slt i32 {{.*}}, 10
+extern "C" void from_constexpr(void) {
+  static constexpr int C0 = 4;
+#pragma omp split counts(C0, omp_fill)
+  for (int i = 0; i < 10; ++i)
+    body(i);
+}
diff --git a/clang/test/OpenMP/split_counts_ice.c b/clang/test/OpenMP/split_counts_ice.c
new file mode 100644
index 0000000000000..c746ef417f049
--- /dev/null
+++ b/clang/test/OpenMP/split_counts_ice.c
@@ -0,0 +1,56 @@
+/* `counts` operands as ICEs: macros, enumerators, sizeof (not only raw literals). */
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s
+// expected-no-diagnostics
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s
+
+#define M1 2
+
+extern void body(int);
+
+// CHECK-LABEL: define {{.*}} @from_macros
+// CHECK: .split.iv.0
+// CHECK: icmp slt i32 {{.*}}, 2
+// CHECK: .split.iv.1
+// CHECK: icmp slt i32 {{.*}}, 10
+void from_macros(void) {
+#pragma omp split counts(M1, omp_fill)
+  for (int i = 0; i < 10; ++i)
+    body(i);
+}
+
+enum { EFirst = 3 };
+
+// CHECK-LABEL: define {{.*}} @from_enum
+// CHECK: .split.iv.0
+// CHECK: icmp slt i32 {{.*}}, 3
+// CHECK: .split.iv.1
+// CHECK: icmp slt i32 {{.*}}, 10
+void from_enum(void) {
+#pragma omp split counts(EFirst, omp_fill)
+  for (int i = 0; i < 10; ++i)
+    body(i);
+}
+
+// CHECK-LABEL: define {{.*}} @from_sizeof
+// CHECK: .split.iv.0
+// CHECK: icmp slt i32 {{.*}}, 1
+// CHECK: .split.iv.1
+// CHECK: icmp slt i32 {{.*}}, 10
+void from_sizeof(void) {
+#pragma omp split counts(sizeof(char), omp_fill)
+  for (int i = 0; i < 10; ++i)
+    body(i);
+}
+
+// CHECK-LABEL: define {{.*}} @from_macro_expr
+// CHECK: .split.iv.0
+// CHECK: icmp slt i32 {{.*}}, 4
+// CHECK: .split.iv.1
+// CHECK: icmp slt i32 {{.*}}, 10
+#define BASE 1
+void from_macro_expr(void) {
+#pragma omp split counts(BASE + 3, omp_fill)
+  for (int i = 0; i < 10; ++i)
+    body(i);
+}
diff --git a/clang/test/OpenMP/split_counts_verify.c b/clang/test/OpenMP/split_counts_verify.c
index 6c19999045698..0037dda44ba4a 100644
--- a/clang/test/OpenMP/split_counts_verify.c
+++ b/clang/test/OpenMP/split_counts_verify.c
@@ -1,20 +1,15 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
 /*
- * Verify #pragma omp split counts(c1, c2, ...) at AST, IR, and runtime.
+ * Verify #pragma omp split counts(c1, c2, ...) at syntax and IR levels.
  * counts(3, omp_fill, 2) with n=10 splits into: [0..3), [3..8), [8..10).
  * Sum 0+1+...+9 = 45.
+ * For end-to-end runtime tests see openmp/runtime/test/transform/split/.
  */
-// REQUIRES: x86-registered-target
 
-// 1) Syntax and semantics only
 // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s
 // expected-no-diagnostics
 
-// 2) AST dump should show OMPSplitDirective with OMPCountsClause node.
-// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-dump %s 2>&1 | FileCheck %s --check-prefix=AST
-
-// 3) Emit LLVM: three sequential loops
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=60 -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=IR
-
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
 
 int main(void) {
   const int n = 10;
@@ -27,14 +22,101 @@ int main(void) {
 
   return (sum == 45) ? 0 : 1;
 }
-
-// AST: OMPSplitDirective
-// AST: OMPCountsClause
-
-// IR: define
-// IR: .split.iv.0
-// IR: icmp slt i32 {{.*}}, 3
-// IR: .split.iv.1
-// IR: icmp slt i32 {{.*}}, 8
-// IR: .split.iv.2
-// IR: icmp slt i32 {{.*}}, 10
+// IR-LABEL: define dso_local i32 @main(
+// IR-SAME: ) #[[ATTR0:[0-9]+]] {
+// IR-NEXT:  [[ENTRY:.*:]]
+// IR-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[N:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[SUM:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[I:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4
+// IR-NEXT:    store i32 0, ptr [[RETVAL]], align 4
+// IR-NEXT:    store i32 10, ptr [[N]], align 4
+// IR-NEXT:    store i32 0, ptr [[SUM]], align 4
+// IR-NEXT:    store i32 0, ptr [[I]], align 4
+// IR-NEXT:    store i32 0, ptr [[DOTSPLIT_IV_0_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.*]]
+// IR:       [[FOR_COND]]:
+// IR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// IR-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 3
+// IR-NEXT:    br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// IR:       [[FOR_BODY]]:
+// IR-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// IR-NEXT:    store i32 [[TMP1]], ptr @main..omp.iv, align 4
+// IR-NEXT:    [[TMP2:%.*]] = load i32, ptr @main..omp.iv, align 4
+// IR-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
+// IR-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// IR-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+// IR-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
+// IR-NEXT:    [[TMP4:%.*]] = load i32, ptr [[SUM]], align 4
+// IR-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+// IR-NEXT:    store i32 [[ADD1]], ptr [[SUM]], align 4
+// IR-NEXT:    br label %[[FOR_INC:.*]]
+// IR:       [[FOR_INC]]:
+// IR-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
+// IR-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP5]], 1
+// IR-NEXT:    store i32 [[INC]], ptr [[DOTSPLIT_IV_0_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
+// IR:       [[FOR_END]]:
+// IR-NEXT:    store i32 3, ptr [[DOTSPLIT_IV_1_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND2:.*]]
+// IR:       [[FOR_COND2]]:
+// IR-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// IR-NEXT:    [[CMP3:%.*]] = icmp slt i32 [[TMP6]], 8
+// IR-NEXT:    br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END10:.*]]
+// IR:       [[FOR_BODY4]]:
+// IR-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// IR-NEXT:    store i32 [[TMP7]], ptr @main..omp.iv, align 4
+// IR-NEXT:    [[TMP8:%.*]] = load i32, ptr @main..omp.iv, align 4
+// IR-NEXT:    [[MUL5:%.*]] = mul nsw i32 [[TMP8]], 1
+// IR-NEXT:    [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
+// IR-NEXT:    store i32 [[ADD6]], ptr [[I]], align 4
+// IR-NEXT:    [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// IR-NEXT:    [[TMP10:%.*]] = load i32, ptr [[SUM]], align 4
+// IR-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+// IR-NEXT:    store i32 [[ADD7]], ptr [[SUM]], align 4
+// IR-NEXT:    br label %[[FOR_INC8:.*]]
+// IR:       [[FOR_INC8]]:
+// IR-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
+// IR-NEXT:    [[INC9:%.*]] = add nsw i32 [[TMP11]], 1
+// IR-NEXT:    store i32 [[INC9]], ptr [[DOTSPLIT_IV_1_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND2]], !llvm.loop [[LOOP4:![0-9]+]]
+// IR:       [[FOR_END10]]:
+// IR-NEXT:    store i32 8, ptr [[DOTSPLIT_IV_2_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND11:.*]]
+// IR:       [[FOR_COND11]]:
+// IR-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// IR-NEXT:    [[CMP12:%.*]] = icmp slt i32 [[TMP12]], 10
+// IR-NEXT:    br i1 [[CMP12]], label %[[FOR_BODY13:.*]], label %[[FOR_END19:.*]]
+// IR:       [[FOR_BODY13]]:
+// IR-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// IR-NEXT:    store i32 [[TMP13]], ptr @main..omp.iv, align 4
+// IR-NEXT:    [[TMP14:%.*]] = load i32, ptr @main..omp.iv, align 4
+// IR-NEXT:    [[MUL14:%.*]] = mul nsw i32 [[TMP14]], 1
+// IR-NEXT:    [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
+// IR-NEXT:    store i32 [[ADD15]], ptr [[I]], align 4
+// IR-NEXT:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4
+// IR-NEXT:    [[TMP16:%.*]] = load i32, ptr [[SUM]], align 4
+// IR-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP16]], [[TMP15]]
+// IR-NEXT:    store i32 [[ADD16]], ptr [[SUM]], align 4
+// IR-NEXT:    br label %[[FOR_INC17:.*]]
+// IR:       [[FOR_INC17]]:
+// IR-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
+// IR-NEXT:    [[INC18:%.*]] = add nsw i32 [[TMP17]], 1
+// IR-NEXT:    store i32 [[INC18]], ptr [[DOTSPLIT_IV_2_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND11]], !llvm.loop [[LOOP5:![0-9]+]]
+// IR:       [[FOR_END19]]:
+// IR-NEXT:    [[TMP18:%.*]] = load i32, ptr [[SUM]], align 4
+// IR-NEXT:    [[CMP20:%.*]] = icmp eq i32 [[TMP18]], 45
+// IR-NEXT:    [[TMP19:%.*]] = zext i1 [[CMP20]] to i64
+// IR-NEXT:    [[COND:%.*]] = select i1 [[CMP20]], i32 0, i32 1
+// IR-NEXT:    ret i32 [[COND]]
+//
+//.
+// IR: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
+// IR: [[META3]] = !{!"llvm.loop.mustprogress"}
+// IR: [[LOOP4]] = distinct !{[[LOOP4]], [[META3]]}
+// IR: [[LOOP5]] = distinct !{[[LOOP5]], [[META3]]}
+//.
diff --git a/clang/test/OpenMP/split_distribute_inner_split.cpp b/clang/test/OpenMP/split_distribute_inner_split.cpp
new file mode 100644
index 0000000000000..9134ec7d8e0de
--- /dev/null
+++ b/clang/test/OpenMP/split_distribute_inner_split.cpp
@@ -0,0 +1,14 @@
+// `distribute` outer loop with inner `split` (combined-construct interop beyond host `teams` case).
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s
+
+// CHECK-LABEL: define {{.*}} @_Z1fv
+// CHECK: .split.iv
+void f(void) {
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp split counts(2, omp_fill)
+    for (int j = 0; j < 10; ++j) {
+    }
+  }
+}
diff --git a/clang/test/OpenMP/split_driver_smoke.c b/clang/test/OpenMP/split_driver_smoke.c
new file mode 100644
index 0000000000000..5969611c9b425
--- /dev/null
+++ b/clang/test/OpenMP/split_driver_smoke.c
@@ -0,0 +1,12 @@
+// Driver forwards `-fopenmp-version=60` with split source (`###` only — no link).
+// REQUIRES: x86-registered-target
+//
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-version=60 -c %s -o %t.o 2>&1 | FileCheck %s --check-prefix=INVOC
+
+void f(int n) {
+#pragma omp split counts(2, omp_fill)
+  for (int i = 0; i < n; ++i) {
+  }
+}
+
+// INVOC: -fopenmp-version=60
diff --git a/clang/test/OpenMP/split_iv_types.c b/clang/test/OpenMP/split_iv_types.c
new file mode 100644
index 0000000000000..76606f09fc427
--- /dev/null
+++ b/clang/test/OpenMP/split_iv_types.c
@@ -0,0 +1,24 @@
+/* Non-int IV types with split. */
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s --check-prefix=U32
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s --check-prefix=I64
+
+extern void body(unsigned int);
+extern void body64(long);
+
+// U32-LABEL: define {{.*}} @unsigned_iv
+// U32: .split.iv
+// U32-DAG: icmp ult i32
+void unsigned_iv(void) {
+#pragma omp split counts(2, omp_fill)
+  for (unsigned i = 0; i < 10U; ++i)
+    body(i);
+}
+
+// I64-LABEL: define {{.*}} @long_iv
+// I64: .split.iv
+// I64-DAG: icmp slt i64
+void long_iv(void) {
+#pragma omp split counts(2, omp_fill)
+  for (long i = 0; i < 10L; ++i)
+    body64(i);
+}
diff --git a/clang/test/OpenMP/split_loop_styles.cpp b/clang/test/OpenMP/split_loop_styles.cpp
new file mode 100644
index 0000000000000..ea0bf3d571e1b
--- /dev/null
+++ b/clang/test/OpenMP/split_loop_styles.cpp
@@ -0,0 +1,14 @@
+// Outer-declared iteration variable + split.
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s
+
+extern "C" void body(int);
+
+// CHECK-LABEL: define {{.*}} @_Z8outer_iv
+// CHECK: .split.iv
+void outer_iv(int n) {
+  int i;
+#pragma omp split counts(3, omp_fill)
+  for (i = 0; i < n; ++i)
+    body(i);
+}
diff --git a/clang/test/OpenMP/split_member_ctor.cpp b/clang/test/OpenMP/split_member_ctor.cpp
new file mode 100644
index 0000000000000..e869602e1a84f
--- /dev/null
+++ b/clang/test/OpenMP/split_member_ctor.cpp
@@ -0,0 +1,20 @@
+// Split on loop in constructor using member-related bound.
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s
+
+extern "C" void body(int);
+
+struct S {
+  int n;
+  S() : n(10) {
+#pragma omp split counts(3, omp_fill)
+    for (int i = 0; i < n; ++i)
+      body(i);
+  }
+};
+
+// CHECK-LABEL: define {{.*}} @_ZN1SC1Ev
+// CHECK: .split.iv
+void use_s() {
+  S s;
+}
diff --git a/clang/test/OpenMP/split_nested_outer_only.c b/clang/test/OpenMP/split_nested_outer_only.c
new file mode 100644
index 0000000000000..578a11212c658
--- /dev/null
+++ b/clang/test/OpenMP/split_nested_outer_only.c
@@ -0,0 +1,12 @@
+// Split attaches to the outer canonical `for`; inner loop stays unsplit.
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s
+
+// Exactly one split IV — the outer loop; inner `for` uses plain `i`/`j` control flow.
+// CHECK-COUNT-1: .split.iv
+void f(void) {
+#pragma omp split counts(omp_fill)
+  for (int i = 0; i < 4; ++i)
+    for (int j = 0; j < 4; ++j) {
+    }
+}
diff --git a/clang/test/OpenMP/split_offload_codegen.cpp b/clang/test/OpenMP/split_offload_codegen.cpp
new file mode 100644
index 0000000000000..d212fdad14520
--- /dev/null
+++ b/clang/test/OpenMP/split_offload_codegen.cpp
@@ -0,0 +1,27 @@
+// Split inside `#pragma omp target` — host and device IR show `.split.iv`.
+//
+// RUN: %clang_cc1 -DCK_SPLIT -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - 2>&1 | FileCheck -check-prefix=HOST %s
+// RUN: %clang_cc1 -DCK_SPLIT -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-split-host.bc
+// RUN: %clang_cc1 -DCK_SPLIT -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-split-host.bc -o - 2>&1 | FileCheck -check-prefix=DEVICE %s
+
+// expected-no-diagnostics
+
+#ifdef CK_SPLIT
+extern "C" void body(int);
+
+void host_split_in_target(int n) {
+#pragma omp target map(to : n)
+  {
+#pragma omp split counts(2, omp_fill)
+    for (int i = 0; i < n; ++i)
+      body(i);
+  }
+}
+
+// HOST: define {{.*}}void {{.*}}host_split_in_target
+// HOST: .split.iv
+// HOST: __tgt_target_kernel
+
+// DEVICE: define {{.*}}void @__omp_offloading_
+// DEVICE: .split.iv
+#endif
diff --git a/clang/test/OpenMP/split_opts_simd_debug.cpp b/clang/test/OpenMP/split_opts_simd_debug.cpp
new file mode 100644
index 0000000000000..d257aa0e2ef5e
--- /dev/null
+++ b/clang/test/OpenMP/split_opts_simd_debug.cpp
@@ -0,0 +1,30 @@
+// Optimized split IR at -O1; split + `-fopenmp-simd` syntax-only here (host emit-llvm can crash); -g smoke.
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O1 -emit-llvm -DTEST_BODY %s -o - | FileCheck %s --check-prefix=O1
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp-simd -fopenmp-version=60 -fsyntax-only -verify -DTEST_SIMD %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm -debug-info-kind=limited -DTEST_BODY %s -o - | FileCheck %s --check-prefix=DBG
+
+extern "C" void body(int);
+
+#if defined(TEST_SIMD)
+// expected-no-diagnostics
+void simd_ok(int n) {
+#pragma omp split counts(2, omp_fill)
+  for (int i = 0; i < n; ++i)
+    body(i);
+}
+#endif
+
+#if defined(TEST_BODY)
+// O1-LABEL: define {{.*}} @_Z4testi
+// O1: .split.iv
+// DBG-LABEL: define {{.*}} @_Z4testi
+// DBG: .split.iv
+// DBG: !dbg
+void test(int n) {
+#pragma omp split counts(2, omp_fill)
+  for (int i = 0; i < n; ++i)
+    body(i);
+}
+#endif
+
diff --git a/clang/test/OpenMP/split_parallel_split.cpp b/clang/test/OpenMP/split_parallel_split.cpp
new file mode 100644
index 0000000000000..dd520e46c50d2
--- /dev/null
+++ b/clang/test/OpenMP/split_parallel_split.cpp
@@ -0,0 +1,15 @@
+// Valid nesting — `split` inside `omp parallel` (contrast `teams` rejection in split_teams_nesting.cpp).
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s
+
+// CHECK-LABEL: define {{.*}} @_Z1fv
+// CHECK: __kmpc_fork_call
+// CHECK: .split.iv
+void f(void) {
+#pragma omp parallel
+  {
+#pragma omp split counts(2, omp_fill)
+    for (int i = 0; i < 10; ++i) {
+    }
+  }
+}
diff --git a/clang/test/OpenMP/split_pch_codegen.cpp b/clang/test/OpenMP/split_pch_codegen.cpp
new file mode 100644
index 0000000000000..c31028bebe5e0
--- /dev/null
+++ b/clang/test/OpenMP/split_pch_codegen.cpp
@@ -0,0 +1,43 @@
+// PCH round-trip for AST dump/print and host IR (split + counts).
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s
+// expected-no-diagnostics
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -emit-pch -o %t %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -include-pch %t -ast-print %s | FileCheck %s --check-prefix=PRINT
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2
+
+#ifndef HEADER
+#define HEADER
+
+extern "C" void body(int);
+
+// PRINT-LABEL: void foo(
+// DUMP-LABEL:  FunctionDecl {{.*}} foo
+void foo(int n) {
+  // PRINT:     #pragma omp split counts(3, omp_fill)
+  // DUMP:      OMPSplitDirective
+  // DUMP-NEXT:   OMPCountsClause
+  // DUMP: IntegerLiteral {{.*}} 3
+#pragma omp split counts(3, omp_fill)
+  // DUMP: ForStmt
+  for (int i = 0; i < n; ++i)
+    body(i);
+}
+
+// CHECK1-LABEL: define {{.*}} @_Z3foo
+// CHECK1: .split.iv
+// CHECK1: icmp
+// CHECK1: call void @body
+
+// CHECK2-LABEL: define {{.*}} @_Z3foo
+// CHECK2: .split.iv
+// CHECK2: icmp
+// CHECK2: call void @body
+
+#endif /* HEADER */
diff --git a/clang/test/OpenMP/split_range_for_diag.cpp b/clang/test/OpenMP/split_range_for_diag.cpp
new file mode 100644
index 0000000000000..9752dd6884966
--- /dev/null
+++ b/clang/test/OpenMP/split_range_for_diag.cpp
@@ -0,0 +1,25 @@
+// C++ range-for + split: verify syntax, IR, and PreInits (range evaluated once).
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -std=c++17 -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s
+// expected-no-diagnostics
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -std=c++17 -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s
+
+extern "C" void body(int);
+
+// CHECK-LABEL: define dso_local void @_Z10range_fillv
+// CHECK: __range
+// CHECK: __begin
+// CHECK: __end
+// CHECK: .split.iv.0
+// CHECK: icmp slt i64 {{.*}}, 2
+// CHECK: call void @body
+// CHECK: .split.iv.1
+// CHECK: icmp slt
+// CHECK: call void @body
+void range_fill() {
+  int a[] = {10, 20, 30, 40};
+#pragma omp split counts(2, omp_fill)
+  for (int &x : a)
+    body(x);
+}
diff --git a/clang/test/OpenMP/split_serialize_module.cpp b/clang/test/OpenMP/split_serialize_module.cpp
new file mode 100644
index 0000000000000..861e9a8bb8034
--- /dev/null
+++ b/clang/test/OpenMP/split_serialize_module.cpp
@@ -0,0 +1,24 @@
+// C++20 module interface with `#pragma omp split` — emit BMI + import; AST retains directive.
+//
+// RUN: rm -rf %t && split-file %s %t && cd %t
+// RUN: %clang_cc1 -std=c++20 -fopenmp -fopenmp-version=60 -triple x86_64-unknown-linux-gnu %t/SplitMod.cppm -emit-module-interface -o %t/SplitMod.pcm
+// RUN: %clang_cc1 -std=c++20 -fopenmp -fopenmp-version=60 -triple x86_64-unknown-linux-gnu %t/UseSplitMod.cpp -fmodule-file=SplitMod=%t/SplitMod.pcm -ast-dump-all | FileCheck %t/SplitMod.cppm
+
+// expected-no-diagnostics
+
+//--- SplitMod.cppm
+module;
+export module SplitMod;
+
+export void splitfoo(int n) {
+// CHECK: OMPSplitDirective
+// CHECK: OMPCountsClause
+#pragma omp split counts(2, omp_fill)
+  for (int i = 0; i < n; ++i) {
+  }
+}
+
+//--- UseSplitMod.cpp
+import SplitMod;
+
+void g(void) { splitfoo(10); }
diff --git a/clang/test/OpenMP/split_teams_nesting.cpp b/clang/test/OpenMP/split_teams_nesting.cpp
new file mode 100644
index 0000000000000..1120a7ccae671
--- /dev/null
+++ b/clang/test/OpenMP/split_teams_nesting.cpp
@@ -0,0 +1,13 @@
+// Split is not valid nested inside `teams` (host diagnostic).
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s
+
+void g(void) {
+#pragma omp teams
+  {
+// expected-error at +1 {{region cannot be closely nested inside 'teams' region}}
+#pragma omp split counts(2, omp_fill)
+    for (int i = 0; i < 10; ++i) {
+    }
+  }
+}
diff --git a/clang/test/OpenMP/split_template_nttp.cpp b/clang/test/OpenMP/split_template_nttp.cpp
new file mode 100644
index 0000000000000..1cf0ee39c3c73
--- /dev/null
+++ b/clang/test/OpenMP/split_template_nttp.cpp
@@ -0,0 +1,15 @@
+// Non-type template parameter as counts operand — IR after instantiation.
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -std=c++17 -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s
+
+// CHECK-LABEL: define {{.*}} @_Z1fILi5EEvv
+// CHECK: .split.iv
+// CHECK: icmp slt i32{{.*}} 5
+template <int N>
+void f() {
+#pragma omp split counts(N, omp_fill)
+  for (int i = 0; i < 20; ++i) {
+  }
+}
+
+template void f<5>();
diff --git a/clang/test/OpenMP/split_templates.cpp b/clang/test/OpenMP/split_templates.cpp
new file mode 100644
index 0000000000000..f6a4dfbfdc81b
--- /dev/null
+++ b/clang/test/OpenMP/split_templates.cpp
@@ -0,0 +1,30 @@
+// Dependent template defers transformation; explicit instantiation emits IR.
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -std=c++17 -fopenmp -fopenmp-version=60 -ast-dump -DTEST_DEP %s | FileCheck %s --check-prefix=DEP
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -std=c++17 -fopenmp -fopenmp-version=60 -O0 -emit-llvm -DTEST_INST %s -o - | FileCheck %s --check-prefix=LLVM
+
+extern "C" void body(int);
+
+#if defined(TEST_DEP)
+template <typename T>
+void dep_split(T n) {
+#pragma omp split counts(2, omp_fill)
+  for (T i = 0; i < n; ++i)
+    body((int)i);
+}
+// DEP-LABEL: dep_split
+// DEP: OMPSplitDirective
+// DEP: ForStmt
+#endif
+
+#if defined(TEST_INST)
+template <typename T>
+void dep_split(T n) {
+#pragma omp split counts(2, omp_fill)
+  for (T i = 0; i < n; ++i)
+    body((int)i);
+}
+template void dep_split<int>(int);
+// LLVM: .split.iv
+// LLVM: call void @body
+#endif
diff --git a/clang/test/OpenMP/split_trip_volatile.c b/clang/test/OpenMP/split_trip_volatile.c
new file mode 100644
index 0000000000000..01b5e7f534d98
--- /dev/null
+++ b/clang/test/OpenMP/split_trip_volatile.c
@@ -0,0 +1,14 @@
+// Volatile trip count — IR shows `load volatile` of bound + split IVs (omp_fill segment).
+//
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s
+
+volatile int n;
+
+// CHECK-LABEL: define {{.*}} @f
+// CHECK: load volatile i32, ptr @n
+// CHECK: .split.iv
+void f(void) {
+#pragma omp split counts(2, omp_fill)
+  for (int i = 0; i < n; ++i) {
+  }
+}

>From c85a63d315e41db2b06a031452d46659f63b8448 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Tue, 31 Mar 2026 07:09:02 -0400
Subject: [PATCH 27/32] range-for fix

---
 clang/lib/Sema/SemaOpenMP.cpp              | 20 ++++++--
 clang/test/OpenMP/split_codegen.cpp        | 56 +++++++++++-----------
 clang/test/OpenMP/split_range_for_diag.cpp |  2 +-
 3 files changed, 45 insertions(+), 33 deletions(-)

diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index b602187309989..371ad3faaeff8 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -16102,9 +16102,23 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
       return StmtError();
 
     SmallVector<Stmt *, 4> BodyStmts;
-    BodyStmts.push_back(UpdateExpr.get());
-    if (auto *CXXRangeFor = dyn_cast<CXXForRangeStmt>(LoopStmt))
-      BodyStmts.push_back(CXXRangeFor->getLoopVarStmt());
+    BodyStmts.push_back(IVAssign.get());
+    BodyStmts.append(LoopHelper.Updates.begin(), LoopHelper.Updates.end());
+    if (auto *CXXRangeFor = dyn_cast<CXXForRangeStmt>(LoopStmt)) {
+      if (Seg == 0) {
+        BodyStmts.push_back(CXXRangeFor->getLoopVarStmt());
+      } else {
+        VarDecl *LoopVar = CXXRangeFor->getLoopVariable();
+        DeclRefExpr *LVRef = buildDeclRefExpr(
+            SemaRef, LoopVar, LoopVar->getType().getNonReferenceType(),
+            OrigVarLoc);
+        ExprResult LVAssign = SemaRef.BuildBinOp(
+            CurScope, OrigVarLoc, BO_Assign, LVRef, LoopVar->getInit());
+        if (!LVAssign.isUsable())
+          return StmtError();
+        BodyStmts.push_back(LVAssign.get());
+      }
+    }
     BodyStmts.push_back(Body);
 
     auto *LoopBody =
diff --git a/clang/test/OpenMP/split_codegen.cpp b/clang/test/OpenMP/split_codegen.cpp
index 44eb95fbe0a9f..257090de0fdfa 100644
--- a/clang/test/OpenMP/split_codegen.cpp
+++ b/clang/test/OpenMP/split_codegen.cpp
@@ -83,7 +83,7 @@ extern "C" void split_decrement(int n) {
 void split_range_for() {
   int a[] = {10, 20, 30, 40};
 #pragma omp split counts(2, omp_fill)
-  for (int &x : a)
+  for (int x : a)
     body(x);
 }
 
@@ -903,9 +903,8 @@ void split_range_for() {
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTSPLIT_IV_0___BEGIN1:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[X:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[X:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_1___BEGIN1:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[X13:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]], ptr align 16 @__const._Z15split_range_forv.a, i64 16, i1 false)
 // CHECK1-NEXT:    store ptr [[A]], ptr [[__RANGE1]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !nonnull [[META2]], !align [[META3]]
@@ -946,9 +945,9 @@ void split_range_for() {
 // CHECK1-NEXT:    [[ADD_PTR6:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[MUL]]
 // CHECK1-NEXT:    store ptr [[ADD_PTR6]], ptr [[__BEGIN1]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
-// CHECK1-NEXT:    store ptr [[TMP10]], ptr [[X]], align 8
-// CHECK1-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[X]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+// CHECK1-NEXT:    store i32 [[TMP11]], ptr [[X]], align 4
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[X]], align 4
 // CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP12]])
 // CHECK1-NEXT:    br label [[FOR_INC:%.*]]
 // CHECK1:       for.inc:
@@ -964,7 +963,7 @@ void split_range_for() {
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8
 // CHECK1-NEXT:    [[ADD8:%.*]] = add nsw i64 [[TMP15]], 1
 // CHECK1-NEXT:    [[CMP9:%.*]] = icmp slt i64 [[TMP14]], [[ADD8]]
-// CHECK1-NEXT:    br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END16:%.*]]
+// CHECK1-NEXT:    br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END15:%.*]]
 // CHECK1:       for.body10:
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i64, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
 // CHECK1-NEXT:    store i64 [[TMP16]], ptr @_ZZ15split_range_forvE7.omp.iv, align 8
@@ -974,17 +973,17 @@ void split_range_for() {
 // CHECK1-NEXT:    [[ADD_PTR12:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[MUL11]]
 // CHECK1-NEXT:    store ptr [[ADD_PTR12]], ptr [[__BEGIN1]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
-// CHECK1-NEXT:    store ptr [[TMP19]], ptr [[X13]], align 8
-// CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[X]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
+// CHECK1-NEXT:    store i32 [[TMP20]], ptr [[X]], align 4
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, ptr [[X]], align 4
 // CHECK1-NEXT:    call void (...) @body(i32 noundef [[TMP21]])
-// CHECK1-NEXT:    br label [[FOR_INC14:%.*]]
-// CHECK1:       for.inc14:
+// CHECK1-NEXT:    br label [[FOR_INC13:%.*]]
+// CHECK1:       for.inc13:
 // CHECK1-NEXT:    [[TMP22:%.*]] = load i64, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
-// CHECK1-NEXT:    [[INC15:%.*]] = add nsw i64 [[TMP22]], 1
-// CHECK1-NEXT:    store i64 [[INC15]], ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
+// CHECK1-NEXT:    [[INC14:%.*]] = add nsw i64 [[TMP22]], 1
+// CHECK1-NEXT:    store i64 [[INC14]], ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
 // CHECK1-NEXT:    br label [[FOR_COND7]], !llvm.loop [[LOOP30:![0-9]+]]
-// CHECK1:       for.end16:
+// CHECK1:       for.end15:
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1093,9 +1092,8 @@ void split_range_for() {
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[DOTSPLIT_IV_0___BEGIN1:%.*]] = alloca i64, align 8
-// CHECK2-NEXT:    [[X:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[X:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_1___BEGIN1:%.*]] = alloca i64, align 8
-// CHECK2-NEXT:    [[X13:%.*]] = alloca ptr, align 8
 // CHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]], ptr align 16 @__const._Z15split_range_forv.a, i64 16, i1 false)
 // CHECK2-NEXT:    store ptr [[A]], ptr [[__RANGE1]], align 8
 // CHECK2-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !nonnull [[META2]], !align [[META3]]
@@ -1136,9 +1134,9 @@ void split_range_for() {
 // CHECK2-NEXT:    [[ADD_PTR6:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[MUL]]
 // CHECK2-NEXT:    store ptr [[ADD_PTR6]], ptr [[__BEGIN1]], align 8
 // CHECK2-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
-// CHECK2-NEXT:    store ptr [[TMP10]], ptr [[X]], align 8
-// CHECK2-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[X]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+// CHECK2-NEXT:    store i32 [[TMP11]], ptr [[X]], align 4
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[X]], align 4
 // CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP12]])
 // CHECK2-NEXT:    br label [[FOR_INC:%.*]]
 // CHECK2:       for.inc:
@@ -1154,7 +1152,7 @@ void split_range_for() {
 // CHECK2-NEXT:    [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8
 // CHECK2-NEXT:    [[ADD8:%.*]] = add nsw i64 [[TMP15]], 1
 // CHECK2-NEXT:    [[CMP9:%.*]] = icmp slt i64 [[TMP14]], [[ADD8]]
-// CHECK2-NEXT:    br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END16:%.*]]
+// CHECK2-NEXT:    br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END15:%.*]]
 // CHECK2:       for.body10:
 // CHECK2-NEXT:    [[TMP16:%.*]] = load i64, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
 // CHECK2-NEXT:    store i64 [[TMP16]], ptr @_ZZ15split_range_forvE7.omp.iv, align 8
@@ -1164,17 +1162,17 @@ void split_range_for() {
 // CHECK2-NEXT:    [[ADD_PTR12:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[MUL11]]
 // CHECK2-NEXT:    store ptr [[ADD_PTR12]], ptr [[__BEGIN1]], align 8
 // CHECK2-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
-// CHECK2-NEXT:    store ptr [[TMP19]], ptr [[X13]], align 8
-// CHECK2-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[X]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
+// CHECK2-NEXT:    store i32 [[TMP20]], ptr [[X]], align 4
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[X]], align 4
 // CHECK2-NEXT:    call void (...) @body(i32 noundef [[TMP21]])
-// CHECK2-NEXT:    br label [[FOR_INC14:%.*]]
-// CHECK2:       for.inc14:
+// CHECK2-NEXT:    br label [[FOR_INC13:%.*]]
+// CHECK2:       for.inc13:
 // CHECK2-NEXT:    [[TMP22:%.*]] = load i64, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
-// CHECK2-NEXT:    [[INC15:%.*]] = add nsw i64 [[TMP22]], 1
-// CHECK2-NEXT:    store i64 [[INC15]], ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
+// CHECK2-NEXT:    [[INC14:%.*]] = add nsw i64 [[TMP22]], 1
+// CHECK2-NEXT:    store i64 [[INC14]], ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
 // CHECK2-NEXT:    br label [[FOR_COND7]], !llvm.loop [[LOOP8:![0-9]+]]
-// CHECK2:       for.end16:
+// CHECK2:       for.end15:
 // CHECK2-NEXT:    ret void
 //
 //
diff --git a/clang/test/OpenMP/split_range_for_diag.cpp b/clang/test/OpenMP/split_range_for_diag.cpp
index 9752dd6884966..2c6a4b50d84bd 100644
--- a/clang/test/OpenMP/split_range_for_diag.cpp
+++ b/clang/test/OpenMP/split_range_for_diag.cpp
@@ -20,6 +20,6 @@ extern "C" void body(int);
 void range_fill() {
   int a[] = {10, 20, 30, 40};
 #pragma omp split counts(2, omp_fill)
-  for (int &x : a)
+  for (int x : a)
     body(x);
 }

>From 7c8a6a98afd46644acba91b9ba10bc0ff400cf67 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Tue, 31 Mar 2026 07:11:10 -0400
Subject: [PATCH 28/32] runtime-tests

---
 .../runtime/test/transform/split/fill_first.c | 28 ++++++++++++++
 .../runtime/test/transform/split/foreach.cpp  | 29 +++++++++++++++
 openmp/runtime/test/transform/split/intfor.c  | 31 ++++++++++++++++
 .../test/transform/split/intfor_negstart.c    | 32 ++++++++++++++++
 .../runtime/test/transform/split/iterfor.cpp  | 25 +++++++++++++
 .../runtime/test/transform/split/leq_bound.c  | 27 ++++++++++++++
 .../transform/split/parallel-split-intfor.c   | 32 ++++++++++++++++
 .../test/transform/split/single_fill.c        | 28 ++++++++++++++
 .../test/transform/split/three_segments.c     | 31 ++++++++++++++++
 .../runtime/test/transform/split/trip_one.c   | 37 +++++++++++++++++++
 .../test/transform/split/unsigned_iv.c        | 29 +++++++++++++++
 .../test/transform/split/zero_first_segment.c | 26 +++++++++++++
 12 files changed, 355 insertions(+)
 create mode 100644 openmp/runtime/test/transform/split/fill_first.c
 create mode 100644 openmp/runtime/test/transform/split/foreach.cpp
 create mode 100644 openmp/runtime/test/transform/split/intfor.c
 create mode 100644 openmp/runtime/test/transform/split/intfor_negstart.c
 create mode 100644 openmp/runtime/test/transform/split/iterfor.cpp
 create mode 100644 openmp/runtime/test/transform/split/leq_bound.c
 create mode 100644 openmp/runtime/test/transform/split/parallel-split-intfor.c
 create mode 100644 openmp/runtime/test/transform/split/single_fill.c
 create mode 100644 openmp/runtime/test/transform/split/three_segments.c
 create mode 100644 openmp/runtime/test/transform/split/trip_one.c
 create mode 100644 openmp/runtime/test/transform/split/unsigned_iv.c
 create mode 100644 openmp/runtime/test/transform/split/zero_first_segment.c

diff --git a/openmp/runtime/test/transform/split/fill_first.c b/openmp/runtime/test/transform/split/fill_first.c
new file mode 100644
index 0000000000000..7835d2caf58e8
--- /dev/null
+++ b/openmp/runtime/test/transform/split/fill_first.c
@@ -0,0 +1,28 @@
+// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main() {
+  printf("do\n");
+#pragma omp split counts(omp_fill, 2)
+  for (int i = 0; i < 7; ++i)
+    printf("i=%d\n", i);
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK:      do
+// CHECK-NEXT: i=0
+// CHECK-NEXT: i=1
+// CHECK-NEXT: i=2
+// CHECK-NEXT: i=3
+// CHECK-NEXT: i=4
+// CHECK-NEXT: i=5
+// CHECK-NEXT: i=6
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/split/foreach.cpp b/openmp/runtime/test/transform/split/foreach.cpp
new file mode 100644
index 0000000000000..e8f3ddc9a56ed
--- /dev/null
+++ b/openmp/runtime/test/transform/split/foreach.cpp
@@ -0,0 +1,29 @@
+// RUN: %libomp-cxx-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdio>
+#include <vector>
+
+int main() {
+  std::vector<int> v = {10, 20, 30, 40, 50, 60};
+  printf("do\n");
+#pragma omp split counts(2, omp_fill)
+  for (int x : v)
+    printf("x=%d\n", x);
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK:      do
+// CHECK-NEXT: x=10
+// CHECK-NEXT: x=20
+// CHECK-NEXT: x=30
+// CHECK-NEXT: x=40
+// CHECK-NEXT: x=50
+// CHECK-NEXT: x=60
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/split/intfor.c b/openmp/runtime/test/transform/split/intfor.c
new file mode 100644
index 0000000000000..8c70afd6f2594
--- /dev/null
+++ b/openmp/runtime/test/transform/split/intfor.c
@@ -0,0 +1,31 @@
+// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main() {
+  printf("do\n");
+#pragma omp split counts(3, omp_fill, 2)
+  for (int i = 0; i < 10; ++i)
+    printf("i=%d\n", i);
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK:      do
+// CHECK-NEXT: i=0
+// CHECK-NEXT: i=1
+// CHECK-NEXT: i=2
+// CHECK-NEXT: i=3
+// CHECK-NEXT: i=4
+// CHECK-NEXT: i=5
+// CHECK-NEXT: i=6
+// CHECK-NEXT: i=7
+// CHECK-NEXT: i=8
+// CHECK-NEXT: i=9
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/split/intfor_negstart.c b/openmp/runtime/test/transform/split/intfor_negstart.c
new file mode 100644
index 0000000000000..978c0a823d843
--- /dev/null
+++ b/openmp/runtime/test/transform/split/intfor_negstart.c
@@ -0,0 +1,32 @@
+// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main() {
+  int n = 8;
+  printf("do\n");
+#pragma omp split counts(1, omp_fill, 1)
+  for (int i = -1; i <= n; ++i)
+    printf("i=%d\n", i);
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK:      do
+// CHECK-NEXT: i=-1
+// CHECK-NEXT: i=0
+// CHECK-NEXT: i=1
+// CHECK-NEXT: i=2
+// CHECK-NEXT: i=3
+// CHECK-NEXT: i=4
+// CHECK-NEXT: i=5
+// CHECK-NEXT: i=6
+// CHECK-NEXT: i=7
+// CHECK-NEXT: i=8
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/split/iterfor.cpp b/openmp/runtime/test/transform/split/iterfor.cpp
new file mode 100644
index 0000000000000..86be9ecbdfee3
--- /dev/null
+++ b/openmp/runtime/test/transform/split/iterfor.cpp
@@ -0,0 +1,25 @@
+// RUN: %libomp-cxx-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdio>
+
+int main() {
+  printf("do\n");
+#pragma omp split counts(1, omp_fill, 1)
+  for (int i = 7; i < 19; i += 3)
+    printf("i=%d\n", i);
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK:      do
+// CHECK-NEXT: i=7
+// CHECK-NEXT: i=10
+// CHECK-NEXT: i=13
+// CHECK-NEXT: i=16
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/split/leq_bound.c b/openmp/runtime/test/transform/split/leq_bound.c
new file mode 100644
index 0000000000000..8ec7041aea42c
--- /dev/null
+++ b/openmp/runtime/test/transform/split/leq_bound.c
@@ -0,0 +1,27 @@
+// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main() {
+  int n = 4;
+  printf("do\n");
+#pragma omp split counts(2, omp_fill)
+  for (int i = 0; i <= n; ++i)
+    printf("i=%d\n", i);
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK:      do
+// CHECK-NEXT: i=0
+// CHECK-NEXT: i=1
+// CHECK-NEXT: i=2
+// CHECK-NEXT: i=3
+// CHECK-NEXT: i=4
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/split/parallel-split-intfor.c b/openmp/runtime/test/transform/split/parallel-split-intfor.c
new file mode 100644
index 0000000000000..ba0524b6ad77e
--- /dev/null
+++ b/openmp/runtime/test/transform/split/parallel-split-intfor.c
@@ -0,0 +1,32 @@
+// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main() {
+  printf("do\n");
+#pragma omp parallel num_threads(1)
+  {
+#pragma omp split counts(2, omp_fill, 2)
+    for (int i = 0; i < 8; ++i)
+      printf("i=%d\n", i);
+  }
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK:      do
+// CHECK-NEXT: i=0
+// CHECK-NEXT: i=1
+// CHECK-NEXT: i=2
+// CHECK-NEXT: i=3
+// CHECK-NEXT: i=4
+// CHECK-NEXT: i=5
+// CHECK-NEXT: i=6
+// CHECK-NEXT: i=7
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/split/single_fill.c b/openmp/runtime/test/transform/split/single_fill.c
new file mode 100644
index 0000000000000..4b10ee7bcb566
--- /dev/null
+++ b/openmp/runtime/test/transform/split/single_fill.c
@@ -0,0 +1,28 @@
+// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main() {
+  int n = 6;
+  printf("do\n");
+#pragma omp split counts(omp_fill)
+  for (int i = 0; i < n; ++i)
+    printf("i=%d\n", i);
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK:      do
+// CHECK-NEXT: i=0
+// CHECK-NEXT: i=1
+// CHECK-NEXT: i=2
+// CHECK-NEXT: i=3
+// CHECK-NEXT: i=4
+// CHECK-NEXT: i=5
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/split/three_segments.c b/openmp/runtime/test/transform/split/three_segments.c
new file mode 100644
index 0000000000000..e06cdafc1d12a
--- /dev/null
+++ b/openmp/runtime/test/transform/split/three_segments.c
@@ -0,0 +1,31 @@
+// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main() {
+  printf("do\n");
+#pragma omp split counts(2, 2, omp_fill)
+  for (int i = 0; i < 10; ++i)
+    printf("i=%d\n", i);
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK:      do
+// CHECK-NEXT: i=0
+// CHECK-NEXT: i=1
+// CHECK-NEXT: i=2
+// CHECK-NEXT: i=3
+// CHECK-NEXT: i=4
+// CHECK-NEXT: i=5
+// CHECK-NEXT: i=6
+// CHECK-NEXT: i=7
+// CHECK-NEXT: i=8
+// CHECK-NEXT: i=9
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/split/trip_one.c b/openmp/runtime/test/transform/split/trip_one.c
new file mode 100644
index 0000000000000..5e520db697bc0
--- /dev/null
+++ b/openmp/runtime/test/transform/split/trip_one.c
@@ -0,0 +1,37 @@
+// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
+
+// Tiny trip counts: trip=1 with counts(1, omp_fill) and trip=0.
+
+#ifndef HEADER
+#define HEADER
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main() {
+  int n;
+
+  n = 1;
+  printf("trip1\n");
+#pragma omp split counts(1, omp_fill)
+  for (int i = 0; i < n; ++i)
+    printf("i=%d\n", i);
+  printf("end1\n");
+
+  n = 0;
+  printf("trip0\n");
+#pragma omp split counts(omp_fill)
+  for (int i = 0; i < n; ++i)
+    printf("i=%d\n", i);
+  printf("end0\n");
+
+  return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK:      trip1
+// CHECK-NEXT: i=0
+// CHECK-NEXT: end1
+// CHECK-NEXT: trip0
+// CHECK-NEXT: end0
diff --git a/openmp/runtime/test/transform/split/unsigned_iv.c b/openmp/runtime/test/transform/split/unsigned_iv.c
new file mode 100644
index 0000000000000..7d2ef434edfec
--- /dev/null
+++ b/openmp/runtime/test/transform/split/unsigned_iv.c
@@ -0,0 +1,29 @@
+// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main() {
+  printf("do\n");
+#pragma omp split counts(3, omp_fill)
+  for (unsigned i = 0; i < 8; ++i)
+    printf("i=%u\n", i);
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK:      do
+// CHECK-NEXT: i=0
+// CHECK-NEXT: i=1
+// CHECK-NEXT: i=2
+// CHECK-NEXT: i=3
+// CHECK-NEXT: i=4
+// CHECK-NEXT: i=5
+// CHECK-NEXT: i=6
+// CHECK-NEXT: i=7
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/split/zero_first_segment.c b/openmp/runtime/test/transform/split/zero_first_segment.c
new file mode 100644
index 0000000000000..bf657fa2e276c
--- /dev/null
+++ b/openmp/runtime/test/transform/split/zero_first_segment.c
@@ -0,0 +1,26 @@
+// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main() {
+  printf("do\n");
+#pragma omp split counts(0, omp_fill)
+  for (int i = 0; i < 5; ++i)
+    printf("i=%d\n", i);
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK:      do
+// CHECK-NEXT: i=0
+// CHECK-NEXT: i=1
+// CHECK-NEXT: i=2
+// CHECK-NEXT: i=3
+// CHECK-NEXT: i=4
+// CHECK-NEXT: done

>From a81de1a68741a3f1f39101a2cd57c0dc94d77870 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Tue, 31 Mar 2026 07:35:12 -0400
Subject: [PATCH 29/32] revised

---
 clang/include/clang/AST/OpenMPClause.h        | 15 ++++++-----
 clang/include/clang/Sema/SemaOpenMP.h         | 11 ++++----
 clang/lib/AST/OpenMPClause.cpp                | 19 +++++++-------
 clang/lib/Parse/ParseOpenMP.cpp               |  2 +-
 clang/lib/Sema/SemaOpenMP.cpp                 | 26 ++++++++++---------
 clang/lib/Sema/TreeTransform.h                |  5 ++--
 clang/lib/Serialization/ASTReader.cpp         |  4 ++-
 clang/lib/Serialization/ASTWriter.cpp         |  4 ++-
 .../ASTMatchers/ASTMatchersNodeTest.cpp       |  2 +-
 9 files changed, 47 insertions(+), 41 deletions(-)

diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index d330f7042fc95..be7f374d357b4 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -1042,8 +1042,8 @@ class OMPCountsClause final
   /// Number of count expressions in the clause.
   unsigned NumCounts;
 
-  /// 0-based index of the omp_fill list item, or UINT_MAX if absent.
-  unsigned OmpFillIndex;
+  /// 0-based index of the omp_fill list item.
+  std::optional<unsigned> OmpFillIndex;
 
   /// Source location of the omp_fill keyword.
   SourceLocation OmpFillLoc;
@@ -1051,7 +1051,7 @@ class OMPCountsClause final
   /// Build an empty clause.
   explicit OMPCountsClause(int NumCounts)
       : OMPClause(llvm::omp::OMPC_counts, SourceLocation(), SourceLocation()),
-        NumCounts(NumCounts), OmpFillIndex(UINT_MAX) {}
+        NumCounts(NumCounts) {}
 
 public:
   /// Build a 'counts' AST node.
@@ -1064,7 +1064,8 @@ class OMPCountsClause final
   static OMPCountsClause *Create(const ASTContext &C, SourceLocation StartLoc,
                                  SourceLocation LParenLoc,
                                  SourceLocation EndLoc, ArrayRef<Expr *> Counts,
-                                 unsigned FillIdx, SourceLocation FillLoc);
+                                 std::optional<unsigned> FillIdx,
+                                 SourceLocation FillLoc);
 
   /// Build an empty 'counts' AST node for deserialization.
   ///
@@ -1081,10 +1082,10 @@ class OMPCountsClause final
   /// Returns the number of list items.
   unsigned getNumCounts() const { return NumCounts; }
 
-  unsigned getOmpFillIndex() const { return OmpFillIndex; }
+  std::optional<unsigned> getOmpFillIndex() const { return OmpFillIndex; }
   SourceLocation getOmpFillLoc() const { return OmpFillLoc; }
-  bool hasOmpFill() const { return OmpFillIndex != UINT_MAX; }
-  void setOmpFillIndex(unsigned Idx) { OmpFillIndex = Idx; }
+  bool hasOmpFill() const { return OmpFillIndex.has_value(); }
+  void setOmpFillIndex(std::optional<unsigned> Idx) { OmpFillIndex = Idx; }
   void setOmpFillLoc(SourceLocation Loc) { OmpFillLoc = Loc; }
 
   /// Returns the count expressions.
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index 38969abd786b3..3621ce96b8724 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -918,12 +918,11 @@ class SemaOpenMP : public SemaBase {
                                     SourceLocation LParenLoc,
                                     SourceLocation EndLoc);
   /// Called on well-formed 'counts' clause after parsing its arguments.
-  OMPClause *ActOnOpenMPCountsClause(ArrayRef<Expr *> CountExprs,
-                                     SourceLocation StartLoc,
-                                     SourceLocation LParenLoc,
-                                     SourceLocation EndLoc, unsigned FillIdx,
-                                     SourceLocation FillLoc,
-                                     unsigned FillCount);
+  OMPClause *
+  ActOnOpenMPCountsClause(ArrayRef<Expr *> CountExprs, SourceLocation StartLoc,
+                          SourceLocation LParenLoc, SourceLocation EndLoc,
+                          std::optional<unsigned> FillIdx,
+                          SourceLocation FillLoc, unsigned FillCount);
   /// Called on well-form 'permutation' clause after parsing its arguments.
   OMPClause *ActOnOpenMPPermutationClause(ArrayRef<Expr *> PermExprs,
                                           SourceLocation StartLoc,
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 607f73d41f98f..4853b17b90ba5 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -20,6 +20,7 @@
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/OpenMPKinds.h"
 #include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
@@ -987,11 +988,10 @@ OMPSizesClause *OMPSizesClause::CreateEmpty(const ASTContext &C,
   return new (Mem) OMPSizesClause(NumSizes);
 }
 
-OMPCountsClause *
-OMPCountsClause::Create(const ASTContext &C, SourceLocation StartLoc,
-                        SourceLocation LParenLoc, SourceLocation EndLoc,
-                        ArrayRef<Expr *> Counts, unsigned FillIdx,
-                        SourceLocation FillLoc) {
+OMPCountsClause *OMPCountsClause::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
+    SourceLocation EndLoc, ArrayRef<Expr *> Counts,
+    std::optional<unsigned> FillIdx, SourceLocation FillLoc) {
   OMPCountsClause *Clause = CreateEmpty(C, Counts.size());
   Clause->setLocStart(StartLoc);
   Clause->setLParenLoc(LParenLoc);
@@ -2008,16 +2008,15 @@ void OMPClausePrinter::VisitOMPSizesClause(OMPSizesClause *Node) {
 
 void OMPClausePrinter::VisitOMPCountsClause(OMPCountsClause *Node) {
   OS << "counts(";
-  unsigned FillIdx = Node->getOmpFillIndex();
+  std::optional<unsigned> FillIdx = Node->getOmpFillIndex();
   ArrayRef<Expr *> Refs = Node->getCountsRefs();
-  for (unsigned I = 0, N = Refs.size(); I < N; ++I) {
+  for (auto I : llvm::seq<unsigned>(Refs.size())) {
     if (I)
       OS << ", ";
-    if (I == FillIdx) {
+    if (FillIdx && I == *FillIdx)
       OS << "omp_fill";
-    } else {
+    else
       Refs[I]->printPretty(OS, nullptr, Policy, 0);
-    }
   }
   OS << ")";
 }
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index d529139a1907c..0e92c3fa1b572 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -2994,7 +2994,7 @@ OMPClause *Parser::ParseOpenMPSizesClause() {
 OMPClause *Parser::ParseOpenMPCountsClause() {
   SourceLocation ClauseNameLoc, OpenLoc, CloseLoc;
   SmallVector<Expr *, 4> ValExprs;
-  unsigned FillIdx = UINT_MAX;
+  std::optional<unsigned> FillIdx;
   unsigned FillCount = 0;
   SourceLocation FillLoc;
 
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 371ad3faaeff8..51d6656926811 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -15980,16 +15980,15 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
   // Internal variable names.
   std::string OrigVarName = OrigVar->getNameInfo().getAsString();
 
-  unsigned FillIdx = CountsClause->getOmpFillIndex();
-  if (!CountsClause->hasOmpFill()) {
+  std::optional<unsigned> FillIdx = CountsClause->getOmpFillIndex();
+  if (!FillIdx)
     return StmtError();
-  }
 
   unsigned NumItems = CountsClause->getNumCounts();
   SmallVector<uint64_t, 4> CountValues(NumItems, 0);
   ArrayRef<Expr *> Refs = CountsClause->getCountsRefs();
   for (unsigned I = 0; I < NumItems; ++I) {
-    if (I == FillIdx)
+    if (I == *FillIdx)
       continue;
     Expr *CountExpr = Refs[I];
     if (!CountExpr)
@@ -16006,7 +16005,7 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
   Expr *NumIterExpr = LoopHelper.NumIterations;
 
   uint64_t RightSum = 0;
-  for (unsigned I = FillIdx + 1; I < NumItems; ++I)
+  for (unsigned I = *FillIdx + 1; I < NumItems; ++I)
     RightSum += CountValues[I];
 
   auto MakeIntLit = [&](uint64_t Val) {
@@ -16024,11 +16023,11 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
     Expr *StartExpr = nullptr;
     Expr *EndExpr = nullptr;
 
-    if (Seg < FillIdx) {
+    if (Seg < *FillIdx) {
       StartExpr = MakeIntLit(LeftAccum);
       LeftAccum += CountValues[Seg];
       EndExpr = MakeIntLit(LeftAccum);
-    } else if (Seg == FillIdx) {
+    } else if (Seg == *FillIdx) {
       StartExpr = MakeIntLit(LeftAccum);
       if (RightRemaining == 0) {
         EndExpr = NumIterExpr;
@@ -18089,15 +18088,18 @@ OMPClause *SemaOpenMP::ActOnOpenMPSizesClause(ArrayRef<Expr *> SizeExprs,
                                 SanitizedSizeExprs);
 }
 
-OMPClause *SemaOpenMP::ActOnOpenMPCountsClause(
-    ArrayRef<Expr *> CountExprs, SourceLocation StartLoc,
-    SourceLocation LParenLoc, SourceLocation EndLoc, unsigned FillIdx,
-    SourceLocation FillLoc, unsigned FillCount) {
+OMPClause *SemaOpenMP::ActOnOpenMPCountsClause(ArrayRef<Expr *> CountExprs,
+                                               SourceLocation StartLoc,
+                                               SourceLocation LParenLoc,
+                                               SourceLocation EndLoc,
+                                               std::optional<unsigned> FillIdx,
+                                               SourceLocation FillLoc,
+                                               unsigned FillCount) {
   SmallVector<Expr *> SanitizedCountExprs(CountExprs);
 
   for (unsigned I = 0; I < SanitizedCountExprs.size(); ++I) {
     Expr *&CountExpr = SanitizedCountExprs[I];
-    if (I == FillIdx)
+    if (FillIdx && I == *FillIdx)
       continue;
     if (!CountExpr)
       continue;
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 1192a207b56bd..9c6b4ecfe58b3 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1772,9 +1772,10 @@ class TreeTransform {
   OMPClause *RebuildOMPCountsClause(ArrayRef<Expr *> Counts,
                                     SourceLocation StartLoc,
                                     SourceLocation LParenLoc,
-                                    SourceLocation EndLoc, unsigned FillIdx,
+                                    SourceLocation EndLoc,
+                                    std::optional<unsigned> FillIdx,
                                     SourceLocation FillLoc) {
-    unsigned FillCount = (FillIdx != UINT_MAX) ? 1 : 0;
+    unsigned FillCount = FillIdx ? 1 : 0;
     return getSema().OpenMP().ActOnOpenMPCountsClause(
         Counts, StartLoc, LParenLoc, EndLoc, FillIdx, FillLoc, FillCount);
   }
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index c456474b66926..73cf7b0972b4d 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -11856,7 +11856,9 @@ void OMPClauseReader::VisitOMPSizesClause(OMPSizesClause *C) {
 }
 
 void OMPClauseReader::VisitOMPCountsClause(OMPCountsClause *C) {
-  C->setOmpFillIndex(Record.readInt());
+  bool HasFill = Record.readBool();
+  if (HasFill)
+    C->setOmpFillIndex(Record.readInt());
   C->setOmpFillLoc(Record.readSourceLocation());
   for (Expr *&E : C->getCountsRefs())
     E = Record.readSubExpr();
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 42e054bec7e18..da691798fc8c0 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -8072,7 +8072,9 @@ void OMPClauseWriter::VisitOMPSizesClause(OMPSizesClause *C) {
 
 void OMPClauseWriter::VisitOMPCountsClause(OMPCountsClause *C) {
   Record.push_back(C->getNumCounts());
-  Record.push_back(C->getOmpFillIndex());
+  Record.push_back(C->hasOmpFill());
+  if (C->hasOmpFill())
+    Record.push_back(*C->getOmpFillIndex());
   Record.AddSourceLocation(C->getOmpFillLoc());
   for (Expr *Count : C->getCountsRefs())
     Record.AddStmt(Count);
diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
index f725784361bf6..4190d4703e37d 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
@@ -3161,7 +3161,7 @@ void f() {
   ASSERT_TRUE(Counts);
   ASSERT_EQ(Counts->getNumCounts(), 2u);
   EXPECT_TRUE(Counts->hasOmpFill());
-  EXPECT_EQ(Counts->getOmpFillIndex(), 1u);
+  EXPECT_EQ(*Counts->getOmpFillIndex(), 1u);
   EXPECT_FALSE(Counts->getCountsRefs()[1]);
 }
 

>From 3276c4f15a2645d616c62bce351fcee278ea94fa Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Tue, 31 Mar 2026 08:09:37 -0400
Subject: [PATCH 30/32] format

---
 clang/lib/Sema/SemaOpenMP.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 51d6656926811..94e552d7b8921 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -16095,8 +16095,8 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
     if (!IncrExpr.isUsable())
       return StmtError();
 
-    ExprResult IVAssign = SemaRef.BuildBinOp(
-        CurScope, OrigVarLoc, BO_Assign, IterationVarRef, MakeIVRef());
+    ExprResult IVAssign = SemaRef.BuildBinOp(CurScope, OrigVarLoc, BO_Assign,
+                                             IterationVarRef, MakeIVRef());
     if (!IVAssign.isUsable())
       return StmtError();
 

>From afecea11d6e5802903b42613de5d88c8a2ac21ed Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Tue, 31 Mar 2026 11:33:12 -0400
Subject: [PATCH 31/32] assert-fix

---
 clang/lib/Sema/SemaOpenMP.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 94e552d7b8921..01b83394e8a3e 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -6467,14 +6467,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective(
            "reverse directive does not support any clauses");
     Res = ActOnOpenMPReverseDirective(AStmt, StartLoc, EndLoc);
     break;
-  case OMPD_split: {
-    assert(OMPExecutableDirective::getSingleClause<OMPCountsClause>(
-               ClausesWithImplicit) &&
-           "split directive requires counts clause");
+  case OMPD_split:
     Res =
         ActOnOpenMPSplitDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc);
     break;
-  }
   case OMPD_interchange:
     Res = ActOnOpenMPInterchangeDirective(ClausesWithImplicit, AStmt, StartLoc,
                                           EndLoc);

>From 155a6c881e8139284219b8df8f67abd72e037438 Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Wed, 1 Apr 2026 06:22:42 -0400
Subject: [PATCH 32/32] revised

---
 clang/include/clang/AST/OpenMPClause.h        |  24 +-
 clang/lib/AST/OpenMPClause.cpp                |   6 +-
 clang/lib/Sema/SemaOpenMP.cpp                 |  19 +-
 clang/test/OpenMP/split_codegen.cpp           | 232 ++++++++++--------
 clang/test/OpenMP/split_composition.cpp       |   4 +-
 .../test/OpenMP/split_compound_associated.cpp |   4 +-
 clang/test/OpenMP/split_counts_verify.c       |  13 +-
 .../OpenMP/split_distribute_inner_split.cpp   |   4 +-
 clang/test/OpenMP/split_loop_styles.cpp       |   4 +-
 clang/test/OpenMP/split_opts_simd_debug.cpp   |   2 +-
 clang/test/OpenMP/split_parallel_split.cpp    |   4 +-
 .../runtime/test/transform/split/fill_first.c |   5 -
 .../runtime/test/transform/split/foreach.cpp  |   5 -
 openmp/runtime/test/transform/split/intfor.c  |   5 -
 .../test/transform/split/intfor_negstart.c    |   5 -
 .../runtime/test/transform/split/iterfor.cpp  | 142 +++++++++--
 .../runtime/test/transform/split/leq_bound.c  |   5 -
 .../test/transform/split/negative_incr.c      |  22 ++
 .../test/transform/split/nonconstant_count.c  |  27 ++
 .../test/transform/split/nonconstant_incr.c   |  22 ++
 .../transform/split/parallel-split-intfor.c   |   5 -
 .../test/transform/split/single_fill.c        |   5 -
 .../test/transform/split/three_segments.c     |   5 -
 .../runtime/test/transform/split/trip_one.c   |   5 -
 .../test/transform/split/unsigned_iv.c        |   5 -
 .../test/transform/split/zero_first_segment.c |   5 -
 26 files changed, 371 insertions(+), 213 deletions(-)
 create mode 100644 openmp/runtime/test/transform/split/negative_incr.c
 create mode 100644 openmp/runtime/test/transform/split/nonconstant_count.c
 create mode 100644 openmp/runtime/test/transform/split/nonconstant_incr.c

diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index be7f374d357b4..ccf2c40bc5efa 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -1040,7 +1040,7 @@ class OMPCountsClause final
   SourceLocation LParenLoc;
 
   /// Number of count expressions in the clause.
-  unsigned NumCounts;
+  unsigned NumCounts = 0;
 
   /// 0-based index of the omp_fill list item.
   std::optional<unsigned> OmpFillIndex;
@@ -1053,6 +1053,17 @@ class OMPCountsClause final
       : OMPClause(llvm::omp::OMPC_counts, SourceLocation(), SourceLocation()),
         NumCounts(NumCounts) {}
 
+  /// Sets the location of '('.
+  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+  void setOmpFillIndex(std::optional<unsigned> Idx) { OmpFillIndex = Idx; }
+  void setOmpFillLoc(SourceLocation Loc) { OmpFillLoc = Loc; }
+
+  /// Sets the count expressions.
+  void setCountsRefs(ArrayRef<Expr *> VL) {
+    assert(VL.size() == NumCounts);
+    llvm::copy(VL, getCountsRefs().begin());
+  }
+
 public:
   /// Build a 'counts' AST node.
   ///
@@ -1073,9 +1084,6 @@ class OMPCountsClause final
   /// \param NumCounts   Number of items in the clause.
   static OMPCountsClause *CreateEmpty(const ASTContext &C, unsigned NumCounts);
 
-  /// Sets the location of '('.
-  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
-
   /// Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
 
@@ -1085,8 +1093,6 @@ class OMPCountsClause final
   std::optional<unsigned> getOmpFillIndex() const { return OmpFillIndex; }
   SourceLocation getOmpFillLoc() const { return OmpFillLoc; }
   bool hasOmpFill() const { return OmpFillIndex.has_value(); }
-  void setOmpFillIndex(std::optional<unsigned> Idx) { OmpFillIndex = Idx; }
-  void setOmpFillLoc(SourceLocation Loc) { OmpFillLoc = Loc; }
 
   /// Returns the count expressions.
   MutableArrayRef<Expr *> getCountsRefs() {
@@ -1096,12 +1102,6 @@ class OMPCountsClause final
     return getTrailingObjects(NumCounts);
   }
 
-  /// Sets the count expressions.
-  void setCountsRefs(ArrayRef<Expr *> VL) {
-    assert(VL.size() == NumCounts);
-    llvm::copy(VL, getCountsRefs().begin());
-  }
-
   child_range children() {
     MutableArrayRef<Expr *> Counts = getCountsRefs();
     return child_range(reinterpret_cast<Stmt **>(Counts.begin()),
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 4853b17b90ba5..3a35e17aff40b 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -2010,14 +2010,12 @@ void OMPClausePrinter::VisitOMPCountsClause(OMPCountsClause *Node) {
   OS << "counts(";
   std::optional<unsigned> FillIdx = Node->getOmpFillIndex();
   ArrayRef<Expr *> Refs = Node->getCountsRefs();
-  for (auto I : llvm::seq<unsigned>(Refs.size())) {
-    if (I)
-      OS << ", ";
+  llvm::interleaveComma(llvm::seq<unsigned>(Refs.size()), OS, [&](unsigned I) {
     if (FillIdx && I == *FillIdx)
       OS << "omp_fill";
     else
       Refs[I]->printPretty(OS, nullptr, Policy, 0);
-  }
+  });
   OS << ")";
 }
 
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 01b83394e8a3e..e45a4e7fcba10 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -15927,7 +15927,7 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
   if (!AStmt)
     return StmtError();
 
-  const OMPCountsClause *CountsClause =
+  const auto *CountsClause =
       OMPExecutableDirective::getSingleClause<OMPCountsClause>(Clauses);
   if (!CountsClause)
     return StmtError();
@@ -15976,15 +15976,15 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
   // Internal variable names.
   std::string OrigVarName = OrigVar->getNameInfo().getAsString();
 
-  std::optional<unsigned> FillIdx = CountsClause->getOmpFillIndex();
-  if (!FillIdx)
+  if (!CountsClause->hasOmpFill())
     return StmtError();
+  unsigned FillIdx = *CountsClause->getOmpFillIndex();
 
   unsigned NumItems = CountsClause->getNumCounts();
   SmallVector<uint64_t, 4> CountValues(NumItems, 0);
   ArrayRef<Expr *> Refs = CountsClause->getCountsRefs();
   for (unsigned I = 0; I < NumItems; ++I) {
-    if (I == *FillIdx)
+    if (I == FillIdx)
       continue;
     Expr *CountExpr = Refs[I];
     if (!CountExpr)
@@ -16001,7 +16001,7 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
   Expr *NumIterExpr = LoopHelper.NumIterations;
 
   uint64_t RightSum = 0;
-  for (unsigned I = *FillIdx + 1; I < NumItems; ++I)
+  for (unsigned I = FillIdx + 1; I < NumItems; ++I)
     RightSum += CountValues[I];
 
   auto MakeIntLit = [&](uint64_t Val) {
@@ -16012,6 +16012,11 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
   size_t NumSegments = NumItems;
   SmallVector<Stmt *, 4> SplitLoops;
 
+  auto *IterVarDecl = cast<VarDecl>(IterationVarRef->getDecl());
+  SplitLoops.push_back(new (Context) DeclStmt(DeclGroupRef(IterVarDecl),
+                                              IterationVarRef->getBeginLoc(),
+                                              IterationVarRef->getEndLoc()));
+
   uint64_t LeftAccum = 0;
   uint64_t RightRemaining = RightSum;
 
@@ -16019,11 +16024,11 @@ StmtResult SemaOpenMP::ActOnOpenMPSplitDirective(ArrayRef<OMPClause *> Clauses,
     Expr *StartExpr = nullptr;
     Expr *EndExpr = nullptr;
 
-    if (Seg < *FillIdx) {
+    if (Seg < FillIdx) {
       StartExpr = MakeIntLit(LeftAccum);
       LeftAccum += CountValues[Seg];
       EndExpr = MakeIntLit(LeftAccum);
-    } else if (Seg == *FillIdx) {
+    } else if (Seg == FillIdx) {
       StartExpr = MakeIntLit(LeftAccum);
       if (RightRemaining == 0) {
         EndExpr = NumIterExpr;
diff --git a/clang/test/OpenMP/split_codegen.cpp b/clang/test/OpenMP/split_codegen.cpp
index 257090de0fdfa..9c739c013c2fc 100644
--- a/clang/test/OpenMP/split_codegen.cpp
+++ b/clang/test/OpenMP/split_codegen.cpp
@@ -116,6 +116,7 @@ void split_range_for() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[I2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
@@ -132,8 +133,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK1:       for.body:
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP1]], ptr @_ZZN1SC1EvE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr @_ZZN1SC1EvE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2:![0-9]+]], !align [[META3:![0-9]+]]
@@ -156,8 +157,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END11:%.*]]
 // CHECK1:       for.body6:
 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP8]], ptr @_ZZN1SC1EvE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr @_ZZN1SC1EvE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL7:%.*]] = mul nsw i32 [[TMP9]], 1
 // CHECK1-NEXT:    [[ADD8:%.*]] = add nsw i32 0, [[MUL7]]
 // CHECK1-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2]], !align [[META3]]
@@ -179,6 +180,7 @@ void split_range_for() {
 // CHECK1-SAME: ) #[[ATTR0]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
@@ -190,8 +192,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK1:       for.body:
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP1]], ptr @_ZZ20split_two_const_tripE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr @_ZZ20split_two_const_tripE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK1-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
@@ -212,8 +214,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END8:%.*]]
 // CHECK1:       for.body3:
 // CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP6]], ptr @_ZZ20split_two_const_tripE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr @_ZZ20split_two_const_tripE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL4:%.*]] = mul nsw i32 [[TMP7]], 1
 // CHECK1-NEXT:    [[ADD5:%.*]] = add nsw i32 0, [[MUL4]]
 // CHECK1-NEXT:    store i32 [[ADD5]], ptr [[I]], align 4
@@ -236,6 +238,7 @@ void split_range_for() {
 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
@@ -255,8 +258,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK1:       for.body:
 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP3]], ptr @_ZZ14split_var_tripE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ14split_var_tripE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK1-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
@@ -279,8 +282,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END11:%.*]]
 // CHECK1:       for.body6:
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP9]], ptr @_ZZ14split_var_tripE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr @_ZZ14split_var_tripE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL7:%.*]] = mul nsw i32 [[TMP10]], 1
 // CHECK1-NEXT:    [[ADD8:%.*]] = add nsw i32 0, [[MUL7]]
 // CHECK1-NEXT:    store i32 [[ADD8]], ptr [[I]], align 4
@@ -303,6 +306,7 @@ void split_range_for() {
 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4
@@ -323,8 +327,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK1:       for.body:
 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP3]], ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK1-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
@@ -348,8 +352,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP6]], label [[FOR_BODY7:%.*]], label [[FOR_END12:%.*]]
 // CHECK1:       for.body7:
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP9]], ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL8:%.*]] = mul nsw i32 [[TMP10]], 1
 // CHECK1-NEXT:    [[ADD9:%.*]] = add nsw i32 0, [[MUL8]]
 // CHECK1-NEXT:    store i32 [[ADD9]], ptr [[I]], align 4
@@ -375,8 +379,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP17]], label [[FOR_BODY18:%.*]], label [[FOR_END23:%.*]]
 // CHECK1:       for.body18:
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP16]], ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL19:%.*]] = mul nsw i32 [[TMP17]], 1
 // CHECK1-NEXT:    [[ADD20:%.*]] = add nsw i32 0, [[MUL19]]
 // CHECK1-NEXT:    store i32 [[ADD20]], ptr [[I]], align 4
@@ -399,6 +403,7 @@ void split_range_for() {
 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
@@ -421,8 +426,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK1:       for.body:
 // CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP4]], ptr @_ZZ16split_first_fillE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr @_ZZ16split_first_fillE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP5]], 1
 // CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK1-NEXT:    store i32 [[ADD4]], ptr [[I]], align 4
@@ -448,8 +453,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END15:%.*]]
 // CHECK1:       for.body10:
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP11]], ptr @_ZZ16split_first_fillE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr @_ZZ16split_first_fillE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL11:%.*]] = mul nsw i32 [[TMP12]], 1
 // CHECK1-NEXT:    [[ADD12:%.*]] = add nsw i32 0, [[MUL11]]
 // CHECK1-NEXT:    store i32 [[ADD12]], ptr [[I]], align 4
@@ -472,6 +477,7 @@ void split_range_for() {
 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
@@ -492,8 +498,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK1:       for.body:
 // CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP4]], ptr @_ZZ15split_only_fillE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr @_ZZ15split_only_fillE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP5]], 1
 // CHECK1-NEXT:    [[ADD3:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK1-NEXT:    store i32 [[ADD3]], ptr [[I]], align 4
@@ -516,6 +522,7 @@ void split_range_for() {
 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4
@@ -536,8 +543,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK1:       for.body:
 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP3]], ptr @_ZZ15split_neg_startE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ15split_neg_startE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL:%.*]] = mul i32 [[TMP4]], 1
 // CHECK1-NEXT:    [[ADD:%.*]] = add i32 -1, [[MUL]]
 // CHECK1-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
@@ -561,8 +568,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP6]], label [[FOR_BODY7:%.*]], label [[FOR_END12:%.*]]
 // CHECK1:       for.body7:
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP9]], ptr @_ZZ15split_neg_startE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr @_ZZ15split_neg_startE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL8:%.*]] = mul i32 [[TMP10]], 1
 // CHECK1-NEXT:    [[ADD9:%.*]] = add i32 -1, [[MUL8]]
 // CHECK1-NEXT:    store i32 [[ADD9]], ptr [[I]], align 4
@@ -588,8 +595,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP17]], label [[FOR_BODY18:%.*]], label [[FOR_END23:%.*]]
 // CHECK1:       for.body18:
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP16]], ptr @_ZZ15split_neg_startE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr @_ZZ15split_neg_startE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL19:%.*]] = mul i32 [[TMP17]], 1
 // CHECK1-NEXT:    [[ADD20:%.*]] = add i32 -1, [[MUL19]]
 // CHECK1-NEXT:    store i32 [[ADD20]], ptr [[I]], align 4
@@ -609,6 +616,7 @@ void split_range_for() {
 // CHECK1-SAME: ) #[[ATTR0]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
@@ -620,8 +628,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK1:       for.body:
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP1]], ptr @_ZZ16split_zero_firstE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr @_ZZ16split_zero_firstE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK1-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
@@ -642,8 +650,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END8:%.*]]
 // CHECK1:       for.body3:
 // CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP6]], ptr @_ZZ16split_zero_firstE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr @_ZZ16split_zero_firstE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL4:%.*]] = mul nsw i32 [[TMP7]], 1
 // CHECK1-NEXT:    [[ADD5:%.*]] = add nsw i32 0, [[MUL4]]
 // CHECK1-NEXT:    store i32 [[ADD5]], ptr [[I]], align 4
@@ -666,6 +674,7 @@ void split_range_for() {
 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4
@@ -686,8 +695,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK1:       for.body:
 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP3]], ptr @_ZZ17split_three_constE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ17split_three_constE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK1-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
@@ -708,8 +717,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP4]], label [[FOR_BODY5:%.*]], label [[FOR_END10:%.*]]
 // CHECK1:       for.body5:
 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP8]], ptr @_ZZ17split_three_constE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr @_ZZ17split_three_constE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL6:%.*]] = mul nsw i32 [[TMP9]], 1
 // CHECK1-NEXT:    [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
 // CHECK1-NEXT:    store i32 [[ADD7]], ptr [[I]], align 4
@@ -732,8 +741,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP13]], label [[FOR_BODY14:%.*]], label [[FOR_END19:%.*]]
 // CHECK1:       for.body14:
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP14]], ptr @_ZZ17split_three_constE7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr @_ZZ17split_three_constE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL15:%.*]] = mul nsw i32 [[TMP15]], 1
 // CHECK1-NEXT:    [[ADD16:%.*]] = add nsw i32 0, [[MUL15]]
 // CHECK1-NEXT:    store i32 [[ADD16]], ptr [[I]], align 4
@@ -756,6 +765,7 @@ void split_range_for() {
 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
@@ -775,8 +785,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK1:       for.body:
 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP3]], ptr @_ZZ11split_step2E7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ11split_step2E7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL:%.*]] = mul i32 [[TMP4]], 2
 // CHECK1-NEXT:    [[ADD:%.*]] = add i32 0, [[MUL]]
 // CHECK1-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
@@ -799,8 +809,8 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END11:%.*]]
 // CHECK1:       for.body6:
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP9]], ptr @_ZZ11split_step2E7.omp.iv, align 4
-// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr @_ZZ11split_step2E7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL7:%.*]] = mul i32 [[TMP10]], 2
 // CHECK1-NEXT:    [[ADD8:%.*]] = add i32 0, [[MUL7]]
 // CHECK1-NEXT:    store i32 [[ADD8]], ptr [[I]], align 4
@@ -823,6 +833,7 @@ void split_range_for() {
 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
@@ -846,9 +857,9 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK1:       for.body:
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP5]], ptr @_ZZ15split_decrementE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr @_ZZ15split_decrementE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
 // CHECK1-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP6]], [[MUL]]
 // CHECK1-NEXT:    store i32 [[SUB4]], ptr [[I]], align 4
@@ -874,9 +885,9 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END15:%.*]]
 // CHECK1:       for.body10:
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK1-NEXT:    store i32 [[TMP13]], ptr @_ZZ15split_decrementE7.omp.iv, align 4
+// CHECK1-NEXT:    store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr @_ZZ15split_decrementE7.omp.iv, align 4
+// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[MUL11:%.*]] = mul nsw i32 [[TMP15]], 1
 // CHECK1-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP14]], [[MUL11]]
 // CHECK1-NEXT:    store i32 [[SUB12]], ptr [[I]], align 4
@@ -902,6 +913,7 @@ void split_range_for() {
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTSPLIT_IV_0___BEGIN1:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[X:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTSPLIT_IV_1___BEGIN1:%.*]] = alloca i64, align 8
@@ -938,9 +950,9 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK1:       for.body:
 // CHECK1-NEXT:    [[TMP7:%.*]] = load i64, ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8
-// CHECK1-NEXT:    store i64 [[TMP7]], ptr @_ZZ15split_range_forvE7.omp.iv, align 8
+// CHECK1-NEXT:    store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8
 // CHECK1-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8
-// CHECK1-NEXT:    [[TMP9:%.*]] = load i64, ptr @_ZZ15split_range_forvE7.omp.iv, align 8
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i64 [[TMP9]], 1
 // CHECK1-NEXT:    [[ADD_PTR6:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[MUL]]
 // CHECK1-NEXT:    store ptr [[ADD_PTR6]], ptr [[__BEGIN1]], align 8
@@ -966,9 +978,9 @@ void split_range_for() {
 // CHECK1-NEXT:    br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END15:%.*]]
 // CHECK1:       for.body10:
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i64, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
-// CHECK1-NEXT:    store i64 [[TMP16]], ptr @_ZZ15split_range_forvE7.omp.iv, align 8
+// CHECK1-NEXT:    store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8
 // CHECK1-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8
-// CHECK1-NEXT:    [[TMP18:%.*]] = load i64, ptr @_ZZ15split_range_forvE7.omp.iv, align 8
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK1-NEXT:    [[MUL11:%.*]] = mul nsw i64 [[TMP18]], 1
 // CHECK1-NEXT:    [[ADD_PTR12:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[MUL11]]
 // CHECK1-NEXT:    store ptr [[ADD_PTR12]], ptr [[__BEGIN1]], align 8
@@ -1016,6 +1028,7 @@ void split_range_for() {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
 // CHECK2-NEXT:    [[I2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
@@ -1032,8 +1045,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK2:       for.body:
 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP1]], ptr @_ZZN1SC1EvE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr @_ZZN1SC1EvE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK2-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2:![0-9]+]], !align [[META3:![0-9]+]]
@@ -1056,8 +1069,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END11:%.*]]
 // CHECK2:       for.body6:
 // CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP8]], ptr @_ZZN1SC1EvE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr @_ZZN1SC1EvE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL7:%.*]] = mul nsw i32 [[TMP9]], 1
 // CHECK2-NEXT:    [[ADD8:%.*]] = add nsw i32 0, [[MUL7]]
 // CHECK2-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[I2]], align 8, !nonnull [[META2]], !align [[META3]]
@@ -1091,6 +1104,7 @@ void split_range_for() {
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[DOTSPLIT_IV_0___BEGIN1:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[X:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_1___BEGIN1:%.*]] = alloca i64, align 8
@@ -1127,9 +1141,9 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK2:       for.body:
 // CHECK2-NEXT:    [[TMP7:%.*]] = load i64, ptr [[DOTSPLIT_IV_0___BEGIN1]], align 8
-// CHECK2-NEXT:    store i64 [[TMP7]], ptr @_ZZ15split_range_forvE7.omp.iv, align 8
+// CHECK2-NEXT:    store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8
 // CHECK2-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8
-// CHECK2-NEXT:    [[TMP9:%.*]] = load i64, ptr @_ZZ15split_range_forvE7.omp.iv, align 8
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i64 [[TMP9]], 1
 // CHECK2-NEXT:    [[ADD_PTR6:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[MUL]]
 // CHECK2-NEXT:    store ptr [[ADD_PTR6]], ptr [[__BEGIN1]], align 8
@@ -1155,9 +1169,9 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END15:%.*]]
 // CHECK2:       for.body10:
 // CHECK2-NEXT:    [[TMP16:%.*]] = load i64, ptr [[DOTSPLIT_IV_1___BEGIN1]], align 8
-// CHECK2-NEXT:    store i64 [[TMP16]], ptr @_ZZ15split_range_forvE7.omp.iv, align 8
+// CHECK2-NEXT:    store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8
 // CHECK2-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8
-// CHECK2-NEXT:    [[TMP18:%.*]] = load i64, ptr @_ZZ15split_range_forvE7.omp.iv, align 8
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK2-NEXT:    [[MUL11:%.*]] = mul nsw i64 [[TMP18]], 1
 // CHECK2-NEXT:    [[ADD_PTR12:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[MUL11]]
 // CHECK2-NEXT:    store ptr [[ADD_PTR12]], ptr [[__BEGIN1]], align 8
@@ -1183,6 +1197,7 @@ void split_range_for() {
 // CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
@@ -1206,9 +1221,9 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK2:       for.body:
 // CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP5]], ptr @_ZZ15split_decrementE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr @_ZZ15split_decrementE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
 // CHECK2-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP6]], [[MUL]]
 // CHECK2-NEXT:    store i32 [[SUB4]], ptr [[I]], align 4
@@ -1234,9 +1249,9 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END15:%.*]]
 // CHECK2:       for.body10:
 // CHECK2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP13]], ptr @_ZZ15split_decrementE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr @_ZZ15split_decrementE7.omp.iv, align 4
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL11:%.*]] = mul nsw i32 [[TMP15]], 1
 // CHECK2-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP14]], [[MUL11]]
 // CHECK2-NEXT:    store i32 [[SUB12]], ptr [[I]], align 4
@@ -1259,6 +1274,7 @@ void split_range_for() {
 // CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
@@ -1281,8 +1297,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK2:       for.body:
 // CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP4]], ptr @_ZZ16split_first_fillE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr @_ZZ16split_first_fillE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP5]], 1
 // CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK2-NEXT:    store i32 [[ADD4]], ptr [[I]], align 4
@@ -1308,8 +1324,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END15:%.*]]
 // CHECK2:       for.body10:
 // CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP11]], ptr @_ZZ16split_first_fillE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr @_ZZ16split_first_fillE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL11:%.*]] = mul nsw i32 [[TMP12]], 1
 // CHECK2-NEXT:    [[ADD12:%.*]] = add nsw i32 0, [[MUL11]]
 // CHECK2-NEXT:    store i32 [[ADD12]], ptr [[I]], align 4
@@ -1332,6 +1348,7 @@ void split_range_for() {
 // CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4
@@ -1352,8 +1369,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK2:       for.body:
 // CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP3]], ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK2-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
@@ -1377,8 +1394,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP6]], label [[FOR_BODY7:%.*]], label [[FOR_END12:%.*]]
 // CHECK2:       for.body7:
 // CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP9]], ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL8:%.*]] = mul nsw i32 [[TMP10]], 1
 // CHECK2-NEXT:    [[ADD9:%.*]] = add nsw i32 0, [[MUL8]]
 // CHECK2-NEXT:    store i32 [[ADD9]], ptr [[I]], align 4
@@ -1404,8 +1421,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP17]], label [[FOR_BODY18:%.*]], label [[FOR_END23:%.*]]
 // CHECK2:       for.body18:
 // CHECK2-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP16]], ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr @_ZZ14split_mid_fillE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL19:%.*]] = mul nsw i32 [[TMP17]], 1
 // CHECK2-NEXT:    [[ADD20:%.*]] = add nsw i32 0, [[MUL19]]
 // CHECK2-NEXT:    store i32 [[ADD20]], ptr [[I]], align 4
@@ -1428,6 +1445,7 @@ void split_range_for() {
 // CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4
@@ -1448,8 +1466,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK2:       for.body:
 // CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP3]], ptr @_ZZ15split_neg_startE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ15split_neg_startE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL:%.*]] = mul i32 [[TMP4]], 1
 // CHECK2-NEXT:    [[ADD:%.*]] = add i32 -1, [[MUL]]
 // CHECK2-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
@@ -1473,8 +1491,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP6]], label [[FOR_BODY7:%.*]], label [[FOR_END12:%.*]]
 // CHECK2:       for.body7:
 // CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP9]], ptr @_ZZ15split_neg_startE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr @_ZZ15split_neg_startE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL8:%.*]] = mul i32 [[TMP10]], 1
 // CHECK2-NEXT:    [[ADD9:%.*]] = add i32 -1, [[MUL8]]
 // CHECK2-NEXT:    store i32 [[ADD9]], ptr [[I]], align 4
@@ -1500,8 +1518,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP17]], label [[FOR_BODY18:%.*]], label [[FOR_END23:%.*]]
 // CHECK2:       for.body18:
 // CHECK2-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP16]], ptr @_ZZ15split_neg_startE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr @_ZZ15split_neg_startE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL19:%.*]] = mul i32 [[TMP17]], 1
 // CHECK2-NEXT:    [[ADD20:%.*]] = add i32 -1, [[MUL19]]
 // CHECK2-NEXT:    store i32 [[ADD20]], ptr [[I]], align 4
@@ -1524,6 +1542,7 @@ void split_range_for() {
 // CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
@@ -1544,8 +1563,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK2:       for.body:
 // CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP4]], ptr @_ZZ15split_only_fillE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr @_ZZ15split_only_fillE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP5]], 1
 // CHECK2-NEXT:    [[ADD3:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK2-NEXT:    store i32 [[ADD3]], ptr [[I]], align 4
@@ -1568,6 +1587,7 @@ void split_range_for() {
 // CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
@@ -1587,8 +1607,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK2:       for.body:
 // CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP3]], ptr @_ZZ11split_step2E7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ11split_step2E7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL:%.*]] = mul i32 [[TMP4]], 2
 // CHECK2-NEXT:    [[ADD:%.*]] = add i32 0, [[MUL]]
 // CHECK2-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
@@ -1611,8 +1631,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END11:%.*]]
 // CHECK2:       for.body6:
 // CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP9]], ptr @_ZZ11split_step2E7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr @_ZZ11split_step2E7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL7:%.*]] = mul i32 [[TMP10]], 2
 // CHECK2-NEXT:    [[ADD8:%.*]] = add i32 0, [[MUL7]]
 // CHECK2-NEXT:    store i32 [[ADD8]], ptr [[I]], align 4
@@ -1635,6 +1655,7 @@ void split_range_for() {
 // CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4
@@ -1655,8 +1676,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK2:       for.body:
 // CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP3]], ptr @_ZZ17split_three_constE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ17split_three_constE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK2-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
@@ -1677,8 +1698,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP4]], label [[FOR_BODY5:%.*]], label [[FOR_END10:%.*]]
 // CHECK2:       for.body5:
 // CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP8]], ptr @_ZZ17split_three_constE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr @_ZZ17split_three_constE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL6:%.*]] = mul nsw i32 [[TMP9]], 1
 // CHECK2-NEXT:    [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
 // CHECK2-NEXT:    store i32 [[ADD7]], ptr [[I]], align 4
@@ -1701,8 +1722,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP13]], label [[FOR_BODY14:%.*]], label [[FOR_END19:%.*]]
 // CHECK2:       for.body14:
 // CHECK2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP14]], ptr @_ZZ17split_three_constE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr @_ZZ17split_three_constE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL15:%.*]] = mul nsw i32 [[TMP15]], 1
 // CHECK2-NEXT:    [[ADD16:%.*]] = add nsw i32 0, [[MUL15]]
 // CHECK2-NEXT:    store i32 [[ADD16]], ptr [[I]], align 4
@@ -1722,6 +1743,7 @@ void split_range_for() {
 // CHECK2-SAME: ) #[[ATTR1]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
@@ -1733,8 +1755,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK2:       for.body:
 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP1]], ptr @_ZZ20split_two_const_tripE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr @_ZZ20split_two_const_tripE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK2-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
@@ -1755,8 +1777,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END8:%.*]]
 // CHECK2:       for.body3:
 // CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP6]], ptr @_ZZ20split_two_const_tripE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr @_ZZ20split_two_const_tripE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL4:%.*]] = mul nsw i32 [[TMP7]], 1
 // CHECK2-NEXT:    [[ADD5:%.*]] = add nsw i32 0, [[MUL4]]
 // CHECK2-NEXT:    store i32 [[ADD5]], ptr [[I]], align 4
@@ -1779,6 +1801,7 @@ void split_range_for() {
 // CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
@@ -1798,8 +1821,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK2:       for.body:
 // CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP3]], ptr @_ZZ14split_var_tripE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr @_ZZ14split_var_tripE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK2-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
@@ -1822,8 +1845,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END11:%.*]]
 // CHECK2:       for.body6:
 // CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP9]], ptr @_ZZ14split_var_tripE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr @_ZZ14split_var_tripE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL7:%.*]] = mul nsw i32 [[TMP10]], 1
 // CHECK2-NEXT:    [[ADD8:%.*]] = add nsw i32 0, [[MUL7]]
 // CHECK2-NEXT:    store i32 [[ADD8]], ptr [[I]], align 4
@@ -1843,6 +1866,7 @@ void split_range_for() {
 // CHECK2-SAME: ) #[[ATTR1]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    store i32 0, ptr [[I]], align 4
@@ -1854,8 +1878,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 // CHECK2:       for.body:
 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP1]], ptr @_ZZ16split_zero_firstE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr @_ZZ16split_zero_firstE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK2-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
@@ -1876,8 +1900,8 @@ void split_range_for() {
 // CHECK2-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END8:%.*]]
 // CHECK2:       for.body3:
 // CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// CHECK2-NEXT:    store i32 [[TMP6]], ptr @_ZZ16split_zero_firstE7.omp.iv, align 4
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr @_ZZ16split_zero_firstE7.omp.iv, align 4
+// CHECK2-NEXT:    store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[MUL4:%.*]] = mul nsw i32 [[TMP7]], 1
 // CHECK2-NEXT:    [[ADD5:%.*]] = add nsw i32 0, [[MUL4]]
 // CHECK2-NEXT:    store i32 [[ADD5]], ptr [[I]], align 4
diff --git a/clang/test/OpenMP/split_composition.cpp b/clang/test/OpenMP/split_composition.cpp
index b6c07e47466d9..eabe3f8f345f7 100644
--- a/clang/test/OpenMP/split_composition.cpp
+++ b/clang/test/OpenMP/split_composition.cpp
@@ -4,10 +4,10 @@
 
 extern "C" void body(int, int);
 
-// CHECK: define {{.*}} @_Z11compositionv
+// CHECK: define {{.*}} @composition(
 // CHECK: .split.iv
 // CHECK: call void @body
-void composition(void) {
+extern "C" void composition(void) {
 #pragma omp parallel for
   for (int i = 0; i < 4; ++i) {
 #pragma omp split counts(2, omp_fill)
diff --git a/clang/test/OpenMP/split_compound_associated.cpp b/clang/test/OpenMP/split_compound_associated.cpp
index 125a2d16d875a..7bbc5107ef672 100644
--- a/clang/test/OpenMP/split_compound_associated.cpp
+++ b/clang/test/OpenMP/split_compound_associated.cpp
@@ -2,9 +2,9 @@
 //
 // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s
 
-// CHECK-LABEL: define {{.*}} @_Z1fv
+// CHECK-LABEL: define {{.*}} @f(
 // CHECK: .split.iv
-void f(void) {
+extern "C" void f(void) {
 #pragma omp split counts(2, omp_fill)
   {
     for (int i = 0; i < 10; ++i) {
diff --git a/clang/test/OpenMP/split_counts_verify.c b/clang/test/OpenMP/split_counts_verify.c
index 0037dda44ba4a..7fec1561d8380 100644
--- a/clang/test/OpenMP/split_counts_verify.c
+++ b/clang/test/OpenMP/split_counts_verify.c
@@ -29,6 +29,7 @@ int main(void) {
 // IR-NEXT:    [[N:%.*]] = alloca i32, align 4
 // IR-NEXT:    [[SUM:%.*]] = alloca i32, align 4
 // IR-NEXT:    [[I:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // IR-NEXT:    [[DOTSPLIT_IV_0_I:%.*]] = alloca i32, align 4
 // IR-NEXT:    [[DOTSPLIT_IV_1_I:%.*]] = alloca i32, align 4
 // IR-NEXT:    [[DOTSPLIT_IV_2_I:%.*]] = alloca i32, align 4
@@ -44,8 +45,8 @@ int main(void) {
 // IR-NEXT:    br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
 // IR:       [[FOR_BODY]]:
 // IR-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTSPLIT_IV_0_I]], align 4
-// IR-NEXT:    store i32 [[TMP1]], ptr @main..omp.iv, align 4
-// IR-NEXT:    [[TMP2:%.*]] = load i32, ptr @main..omp.iv, align 4
+// IR-NEXT:    store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // IR-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
 // IR-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // IR-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
@@ -68,8 +69,8 @@ int main(void) {
 // IR-NEXT:    br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END10:.*]]
 // IR:       [[FOR_BODY4]]:
 // IR-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSPLIT_IV_1_I]], align 4
-// IR-NEXT:    store i32 [[TMP7]], ptr @main..omp.iv, align 4
-// IR-NEXT:    [[TMP8:%.*]] = load i32, ptr @main..omp.iv, align 4
+// IR-NEXT:    store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // IR-NEXT:    [[MUL5:%.*]] = mul nsw i32 [[TMP8]], 1
 // IR-NEXT:    [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
 // IR-NEXT:    store i32 [[ADD6]], ptr [[I]], align 4
@@ -92,8 +93,8 @@ int main(void) {
 // IR-NEXT:    br i1 [[CMP12]], label %[[FOR_BODY13:.*]], label %[[FOR_END19:.*]]
 // IR:       [[FOR_BODY13]]:
 // IR-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTSPLIT_IV_2_I]], align 4
-// IR-NEXT:    store i32 [[TMP13]], ptr @main..omp.iv, align 4
-// IR-NEXT:    [[TMP14:%.*]] = load i32, ptr @main..omp.iv, align 4
+// IR-NEXT:    store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // IR-NEXT:    [[MUL14:%.*]] = mul nsw i32 [[TMP14]], 1
 // IR-NEXT:    [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
 // IR-NEXT:    store i32 [[ADD15]], ptr [[I]], align 4
diff --git a/clang/test/OpenMP/split_distribute_inner_split.cpp b/clang/test/OpenMP/split_distribute_inner_split.cpp
index 9134ec7d8e0de..290d0336f1b03 100644
--- a/clang/test/OpenMP/split_distribute_inner_split.cpp
+++ b/clang/test/OpenMP/split_distribute_inner_split.cpp
@@ -2,9 +2,9 @@
 //
 // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s
 
-// CHECK-LABEL: define {{.*}} @_Z1fv
+// CHECK-LABEL: define {{.*}} @f(
 // CHECK: .split.iv
-void f(void) {
+extern "C" void f(void) {
 #pragma omp distribute
   for (int i = 0; i < 10; ++i) {
 #pragma omp split counts(2, omp_fill)
diff --git a/clang/test/OpenMP/split_loop_styles.cpp b/clang/test/OpenMP/split_loop_styles.cpp
index ea0bf3d571e1b..0aa61b20a87bd 100644
--- a/clang/test/OpenMP/split_loop_styles.cpp
+++ b/clang/test/OpenMP/split_loop_styles.cpp
@@ -4,9 +4,9 @@
 
 extern "C" void body(int);
 
-// CHECK-LABEL: define {{.*}} @_Z8outer_iv
+// CHECK-LABEL: define {{.*}} @outer_iv(
 // CHECK: .split.iv
-void outer_iv(int n) {
+extern "C" void outer_iv(int n) {
   int i;
 #pragma omp split counts(3, omp_fill)
   for (i = 0; i < n; ++i)
diff --git a/clang/test/OpenMP/split_opts_simd_debug.cpp b/clang/test/OpenMP/split_opts_simd_debug.cpp
index d257aa0e2ef5e..d378707ee66c7 100644
--- a/clang/test/OpenMP/split_opts_simd_debug.cpp
+++ b/clang/test/OpenMP/split_opts_simd_debug.cpp
@@ -1,4 +1,4 @@
-// Optimized split IR at -O1; split + `-fopenmp-simd` syntax-only here (host emit-llvm can crash); -g smoke.
+// Optimized split IR at -O1; split + `-fopenmp-simd` syntax-only; -g debug-info smoke.
 //
 // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -O1 -emit-llvm -DTEST_BODY %s -o - | FileCheck %s --check-prefix=O1
 // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp-simd -fopenmp-version=60 -fsyntax-only -verify -DTEST_SIMD %s
diff --git a/clang/test/OpenMP/split_parallel_split.cpp b/clang/test/OpenMP/split_parallel_split.cpp
index dd520e46c50d2..bf30373f9bb8c 100644
--- a/clang/test/OpenMP/split_parallel_split.cpp
+++ b/clang/test/OpenMP/split_parallel_split.cpp
@@ -2,10 +2,10 @@
 //
 // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x c++ -fopenmp -fopenmp-version=60 -O0 -emit-llvm %s -o - | FileCheck %s
 
-// CHECK-LABEL: define {{.*}} @_Z1fv
+// CHECK-LABEL: define {{.*}} @f(
 // CHECK: __kmpc_fork_call
 // CHECK: .split.iv
-void f(void) {
+extern "C" void f(void) {
 #pragma omp parallel
   {
 #pragma omp split counts(2, omp_fill)
diff --git a/openmp/runtime/test/transform/split/fill_first.c b/openmp/runtime/test/transform/split/fill_first.c
index 7835d2caf58e8..12568f6896d18 100644
--- a/openmp/runtime/test/transform/split/fill_first.c
+++ b/openmp/runtime/test/transform/split/fill_first.c
@@ -1,8 +1,5 @@
 // RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
 
-#ifndef HEADER
-#define HEADER
-
 #include <stdlib.h>
 #include <stdio.h>
 
@@ -15,8 +12,6 @@ int main() {
   return EXIT_SUCCESS;
 }
 
-#endif /* HEADER */
-
 // CHECK:      do
 // CHECK-NEXT: i=0
 // CHECK-NEXT: i=1
diff --git a/openmp/runtime/test/transform/split/foreach.cpp b/openmp/runtime/test/transform/split/foreach.cpp
index e8f3ddc9a56ed..846449b9d86ca 100644
--- a/openmp/runtime/test/transform/split/foreach.cpp
+++ b/openmp/runtime/test/transform/split/foreach.cpp
@@ -1,8 +1,5 @@
 // RUN: %libomp-cxx-compile-and-run | FileCheck %s --match-full-lines
 
-#ifndef HEADER
-#define HEADER
-
 #include <cstdlib>
 #include <cstdio>
 #include <vector>
@@ -17,8 +14,6 @@ int main() {
   return EXIT_SUCCESS;
 }
 
-#endif /* HEADER */
-
 // CHECK:      do
 // CHECK-NEXT: x=10
 // CHECK-NEXT: x=20
diff --git a/openmp/runtime/test/transform/split/intfor.c b/openmp/runtime/test/transform/split/intfor.c
index 8c70afd6f2594..321fa5ca51f08 100644
--- a/openmp/runtime/test/transform/split/intfor.c
+++ b/openmp/runtime/test/transform/split/intfor.c
@@ -1,8 +1,5 @@
 // RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
 
-#ifndef HEADER
-#define HEADER
-
 #include <stdlib.h>
 #include <stdio.h>
 
@@ -15,8 +12,6 @@ int main() {
   return EXIT_SUCCESS;
 }
 
-#endif /* HEADER */
-
 // CHECK:      do
 // CHECK-NEXT: i=0
 // CHECK-NEXT: i=1
diff --git a/openmp/runtime/test/transform/split/intfor_negstart.c b/openmp/runtime/test/transform/split/intfor_negstart.c
index 978c0a823d843..1e3860bba2d53 100644
--- a/openmp/runtime/test/transform/split/intfor_negstart.c
+++ b/openmp/runtime/test/transform/split/intfor_negstart.c
@@ -1,8 +1,5 @@
 // RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
 
-#ifndef HEADER
-#define HEADER
-
 #include <stdlib.h>
 #include <stdio.h>
 
@@ -16,8 +13,6 @@ int main() {
   return EXIT_SUCCESS;
 }
 
-#endif /* HEADER */
-
 // CHECK:      do
 // CHECK-NEXT: i=-1
 // CHECK-NEXT: i=0
diff --git a/openmp/runtime/test/transform/split/iterfor.cpp b/openmp/runtime/test/transform/split/iterfor.cpp
index 86be9ecbdfee3..60ecbb374f6e3 100644
--- a/openmp/runtime/test/transform/split/iterfor.cpp
+++ b/openmp/runtime/test/transform/split/iterfor.cpp
@@ -1,25 +1,139 @@
-// RUN: %libomp-cxx-compile-and-run | FileCheck %s --match-full-lines
-
-#ifndef HEADER
-#define HEADER
+// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines
 
 #include <cstdlib>
+#include <cstdarg>
 #include <cstdio>
 
+struct Reporter {
+  const char *name;
+
+  Reporter(const char *name) : name(name) { print("ctor"); }
+
+  Reporter() : name("<anon>") { print("ctor"); }
+
+  Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); }
+
+  Reporter(Reporter &&that) : name(that.name) { print("move ctor"); }
+
+  ~Reporter() { print("dtor"); }
+
+  const Reporter &operator=(const Reporter &that) {
+    print("copy assign");
+    this->name = that.name;
+    return *this;
+  }
+
+  const Reporter &operator=(Reporter &&that) {
+    print("move assign");
+    this->name = that.name;
+    return *this;
+  }
+
+  struct Iterator {
+    const Reporter *owner;
+    int pos;
+
+    Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {}
+
+    Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) {
+      owner->print("iterator copy ctor");
+    }
+
+    Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) {
+      owner->print("iterator move ctor");
+    }
+
+    ~Iterator() { owner->print("iterator dtor"); }
+
+    const Iterator &operator=(const Iterator &that) {
+      owner->print("iterator copy assign");
+      this->owner = that.owner;
+      this->pos = that.pos;
+      return *this;
+    }
+
+    const Iterator &operator=(Iterator &&that) {
+      owner->print("iterator move assign");
+      this->owner = that.owner;
+      this->pos = that.pos;
+      return *this;
+    }
+
+    bool operator==(const Iterator &that) const {
+      owner->print("iterator %d == %d", this->pos, that.pos);
+      return this->pos == that.pos;
+    }
+
+    bool operator!=(const Iterator &that) const {
+      owner->print("iterator %d != %d", this->pos, that.pos);
+      return this->pos != that.pos;
+    }
+
+    Iterator &operator++() {
+      owner->print("iterator prefix ++");
+      pos += 1;
+      return *this;
+    }
+
+    Iterator operator++(int) {
+      owner->print("iterator postfix ++");
+      auto result = *this;
+      pos += 1;
+      return result;
+    }
+
+    int operator*() const {
+      owner->print("iterator deref: %d", pos);
+      return pos;
+    }
+
+    size_t operator-(const Iterator &that) const {
+      int result = this->pos - that.pos;
+      owner->print("iterator distance: %d", result);
+      return result;
+    }
+
+    Iterator operator+(int steps) const {
+      owner->print("iterator advance: %d += %d", this->pos, steps);
+      return Iterator(owner, pos + steps);
+    }
+  };
+
+  Iterator begin() const {
+    print("begin()");
+    return Iterator(this, 0);
+  }
+
+  Iterator end() const {
+    print("end()");
+    return Iterator(this, 4);
+  }
+
+  void print(const char *msg, ...) const {
+    va_list args;
+    va_start(args, msg);
+    printf("[%s] ", name);
+    vprintf(msg, args);
+    printf("\n");
+    va_end(args);
+  }
+};
+
 int main() {
   printf("do\n");
+  Reporter range("range");
 #pragma omp split counts(1, omp_fill, 1)
-  for (int i = 7; i < 19; i += 3)
-    printf("i=%d\n", i);
+  for (auto it = range.begin(); it != range.end(); ++it)
+    printf("v=%d\n", *it);
   printf("done\n");
   return EXIT_SUCCESS;
 }
 
-#endif /* HEADER */
-
-// CHECK:      do
-// CHECK-NEXT: i=7
-// CHECK-NEXT: i=10
-// CHECK-NEXT: i=13
-// CHECK-NEXT: i=16
-// CHECK-NEXT: done
+// CHECK: do
+// CHECK: [range] ctor
+// CHECK: v=0
+// CHECK: v=1
+// CHECK: v=2
+// CHECK: v=3
+// CHECK: done
+// CHECK: [range] dtor
diff --git a/openmp/runtime/test/transform/split/leq_bound.c b/openmp/runtime/test/transform/split/leq_bound.c
index 8ec7041aea42c..81061fe430ae4 100644
--- a/openmp/runtime/test/transform/split/leq_bound.c
+++ b/openmp/runtime/test/transform/split/leq_bound.c
@@ -1,8 +1,5 @@
 // RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
 
-#ifndef HEADER
-#define HEADER
-
 #include <stdlib.h>
 #include <stdio.h>
 
@@ -16,8 +13,6 @@ int main() {
   return EXIT_SUCCESS;
 }
 
-#endif /* HEADER */
-
 // CHECK:      do
 // CHECK-NEXT: i=0
 // CHECK-NEXT: i=1
diff --git a/openmp/runtime/test/transform/split/negative_incr.c b/openmp/runtime/test/transform/split/negative_incr.c
new file mode 100644
index 0000000000000..ce537db067f06
--- /dev/null
+++ b/openmp/runtime/test/transform/split/negative_incr.c
@@ -0,0 +1,22 @@
+// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main() {
+  printf("do\n");
+#pragma omp split counts(1, omp_fill, 1)
+  for (int i = 5; i >= 0; --i)
+    printf("i=%d\n", i);
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+// CHECK:      do
+// CHECK-NEXT: i=5
+// CHECK-NEXT: i=4
+// CHECK-NEXT: i=3
+// CHECK-NEXT: i=2
+// CHECK-NEXT: i=1
+// CHECK-NEXT: i=0
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/split/nonconstant_count.c b/openmp/runtime/test/transform/split/nonconstant_count.c
new file mode 100644
index 0000000000000..c5d97de27e123
--- /dev/null
+++ b/openmp/runtime/test/transform/split/nonconstant_count.c
@@ -0,0 +1,27 @@
+// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main() {
+  int v = 3;
+  printf("do\n");
+#pragma omp split counts(v, omp_fill)
+  for (int i = 0; i < 10; ++i)
+    printf("i=%d\n", i);
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+// CHECK:      do
+// CHECK-NEXT: i=0
+// CHECK-NEXT: i=1
+// CHECK-NEXT: i=2
+// CHECK-NEXT: i=3
+// CHECK-NEXT: i=4
+// CHECK-NEXT: i=5
+// CHECK-NEXT: i=6
+// CHECK-NEXT: i=7
+// CHECK-NEXT: i=8
+// CHECK-NEXT: i=9
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/split/nonconstant_incr.c b/openmp/runtime/test/transform/split/nonconstant_incr.c
new file mode 100644
index 0000000000000..d594a437f2ab2
--- /dev/null
+++ b/openmp/runtime/test/transform/split/nonconstant_incr.c
@@ -0,0 +1,22 @@
+// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main() {
+  int n = 19;
+  int c = 3;
+  printf("do\n");
+#pragma omp split counts(1, omp_fill, 1)
+  for (int i = 7; i < n; i += c)
+    printf("i=%d\n", i);
+  printf("done\n");
+  return EXIT_SUCCESS;
+}
+
+// CHECK:      do
+// CHECK-NEXT: i=7
+// CHECK-NEXT: i=10
+// CHECK-NEXT: i=13
+// CHECK-NEXT: i=16
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/split/parallel-split-intfor.c b/openmp/runtime/test/transform/split/parallel-split-intfor.c
index ba0524b6ad77e..0b9bd7df5027e 100644
--- a/openmp/runtime/test/transform/split/parallel-split-intfor.c
+++ b/openmp/runtime/test/transform/split/parallel-split-intfor.c
@@ -1,8 +1,5 @@
 // RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
 
-#ifndef HEADER
-#define HEADER
-
 #include <stdlib.h>
 #include <stdio.h>
 
@@ -18,8 +15,6 @@ int main() {
   return EXIT_SUCCESS;
 }
 
-#endif /* HEADER */
-
 // CHECK:      do
 // CHECK-NEXT: i=0
 // CHECK-NEXT: i=1
diff --git a/openmp/runtime/test/transform/split/single_fill.c b/openmp/runtime/test/transform/split/single_fill.c
index 4b10ee7bcb566..4ef10bf3b4d26 100644
--- a/openmp/runtime/test/transform/split/single_fill.c
+++ b/openmp/runtime/test/transform/split/single_fill.c
@@ -1,8 +1,5 @@
 // RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
 
-#ifndef HEADER
-#define HEADER
-
 #include <stdlib.h>
 #include <stdio.h>
 
@@ -16,8 +13,6 @@ int main() {
   return EXIT_SUCCESS;
 }
 
-#endif /* HEADER */
-
 // CHECK:      do
 // CHECK-NEXT: i=0
 // CHECK-NEXT: i=1
diff --git a/openmp/runtime/test/transform/split/three_segments.c b/openmp/runtime/test/transform/split/three_segments.c
index e06cdafc1d12a..f34b640a86710 100644
--- a/openmp/runtime/test/transform/split/three_segments.c
+++ b/openmp/runtime/test/transform/split/three_segments.c
@@ -1,8 +1,5 @@
 // RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
 
-#ifndef HEADER
-#define HEADER
-
 #include <stdlib.h>
 #include <stdio.h>
 
@@ -15,8 +12,6 @@ int main() {
   return EXIT_SUCCESS;
 }
 
-#endif /* HEADER */
-
 // CHECK:      do
 // CHECK-NEXT: i=0
 // CHECK-NEXT: i=1
diff --git a/openmp/runtime/test/transform/split/trip_one.c b/openmp/runtime/test/transform/split/trip_one.c
index 5e520db697bc0..5f00d25239685 100644
--- a/openmp/runtime/test/transform/split/trip_one.c
+++ b/openmp/runtime/test/transform/split/trip_one.c
@@ -2,9 +2,6 @@
 
 // Tiny trip counts: trip=1 with counts(1, omp_fill) and trip=0.
 
-#ifndef HEADER
-#define HEADER
-
 #include <stdlib.h>
 #include <stdio.h>
 
@@ -28,8 +25,6 @@ int main() {
   return EXIT_SUCCESS;
 }
 
-#endif /* HEADER */
-
 // CHECK:      trip1
 // CHECK-NEXT: i=0
 // CHECK-NEXT: end1
diff --git a/openmp/runtime/test/transform/split/unsigned_iv.c b/openmp/runtime/test/transform/split/unsigned_iv.c
index 7d2ef434edfec..ad096122d8cea 100644
--- a/openmp/runtime/test/transform/split/unsigned_iv.c
+++ b/openmp/runtime/test/transform/split/unsigned_iv.c
@@ -1,8 +1,5 @@
 // RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
 
-#ifndef HEADER
-#define HEADER
-
 #include <stdlib.h>
 #include <stdio.h>
 
@@ -15,8 +12,6 @@ int main() {
   return EXIT_SUCCESS;
 }
 
-#endif /* HEADER */
-
 // CHECK:      do
 // CHECK-NEXT: i=0
 // CHECK-NEXT: i=1
diff --git a/openmp/runtime/test/transform/split/zero_first_segment.c b/openmp/runtime/test/transform/split/zero_first_segment.c
index bf657fa2e276c..09cc526ed81ec 100644
--- a/openmp/runtime/test/transform/split/zero_first_segment.c
+++ b/openmp/runtime/test/transform/split/zero_first_segment.c
@@ -1,8 +1,5 @@
 // RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
 
-#ifndef HEADER
-#define HEADER
-
 #include <stdlib.h>
 #include <stdio.h>
 
@@ -15,8 +12,6 @@ int main() {
   return EXIT_SUCCESS;
 }
 
-#endif /* HEADER */
-
 // CHECK:      do
 // CHECK-NEXT: i=0
 // CHECK-NEXT: i=1



More information about the llvm-commits mailing list