[clang] [flang] [llvm] [openmp] [Clang][OpenMP][LoopTransformations] Add support for "#pragma omp fuse" loop transformation directive and "looprange" clause (PR #139293)
Walter J.T.V via cfe-commits
cfe-commits at lists.llvm.org
Fri May 23 10:48:05 PDT 2025
https://github.com/eZWALT updated https://github.com/llvm/llvm-project/pull/139293
>From 204d902b738dcd9d260963afab3d4f8f5f1c0066 Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:25:33 +0000
Subject: [PATCH 1/9] Add fuse directive patch
---
clang/include/clang-c/Index.h | 4 +
clang/include/clang/AST/RecursiveASTVisitor.h | 3 +
clang/include/clang/AST/StmtOpenMP.h | 105 +-
.../clang/Basic/DiagnosticSemaKinds.td | 8 +
clang/include/clang/Basic/StmtNodes.td | 1 +
clang/include/clang/Sema/SemaOpenMP.h | 27 +
.../include/clang/Serialization/ASTBitCodes.h | 1 +
clang/lib/AST/StmtOpenMP.cpp | 25 +
clang/lib/AST/StmtPrinter.cpp | 5 +
clang/lib/AST/StmtProfile.cpp | 4 +
clang/lib/Basic/OpenMPKinds.cpp | 2 +-
clang/lib/CodeGen/CGStmt.cpp | 3 +
clang/lib/CodeGen/CGStmtOpenMP.cpp | 8 +
clang/lib/CodeGen/CodeGenFunction.h | 1 +
clang/lib/Sema/SemaExceptionSpec.cpp | 1 +
clang/lib/Sema/SemaOpenMP.cpp | 600 +++++++
clang/lib/Sema/TreeTransform.h | 11 +
clang/lib/Serialization/ASTReaderStmt.cpp | 11 +
clang/lib/Serialization/ASTWriterStmt.cpp | 6 +
clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 1 +
clang/test/OpenMP/fuse_ast_print.cpp | 278 +++
clang/test/OpenMP/fuse_codegen.cpp | 1511 +++++++++++++++++
clang/test/OpenMP/fuse_messages.cpp | 76 +
clang/tools/libclang/CIndex.cpp | 7 +
clang/tools/libclang/CXCursor.cpp | 3 +
llvm/include/llvm/Frontend/OpenMP/OMP.td | 4 +
.../runtime/test/transform/fuse/foreach.cpp | 192 +++
openmp/runtime/test/transform/fuse/intfor.c | 50 +
.../runtime/test/transform/fuse/iterfor.cpp | 194 +++
.../fuse/parallel-wsloop-collapse-foreach.cpp | 208 +++
.../fuse/parallel-wsloop-collapse-intfor.c | 45 +
31 files changed, 3391 insertions(+), 4 deletions(-)
create mode 100644 clang/test/OpenMP/fuse_ast_print.cpp
create mode 100644 clang/test/OpenMP/fuse_codegen.cpp
create mode 100644 clang/test/OpenMP/fuse_messages.cpp
create mode 100644 openmp/runtime/test/transform/fuse/foreach.cpp
create mode 100644 openmp/runtime/test/transform/fuse/intfor.c
create mode 100644 openmp/runtime/test/transform/fuse/iterfor.cpp
create mode 100644 openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp
create mode 100644 openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c
diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h
index d30d15e53802a..00046de62a742 100644
--- a/clang/include/clang-c/Index.h
+++ b/clang/include/clang-c/Index.h
@@ -2162,6 +2162,10 @@ enum CXCursorKind {
*/
CXCursor_OMPStripeDirective = 310,
+ /** OpenMP fuse directive
+ */
+ CXCursor_OMPFuseDirective = 318,
+
/** OpenACC Compute Construct.
*/
CXCursor_OpenACCComputeConstruct = 320,
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 23a8c4f1f7380..057e9e346ce4e 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -3080,6 +3080,9 @@ DEF_TRAVERSE_STMT(OMPUnrollDirective,
DEF_TRAVERSE_STMT(OMPReverseDirective,
{ TRY_TO(TraverseOMPExecutableDirective(S)); })
+DEF_TRAVERSE_STMT(OMPFuseDirective,
+ { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
DEF_TRAVERSE_STMT(OMPInterchangeDirective,
{ TRY_TO(TraverseOMPExecutableDirective(S)); })
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index 736bcabbad1f7..dc6f797e24ab8 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -962,6 +962,9 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective {
/// Number of loops generated by this loop transformation.
unsigned NumGeneratedLoops = 0;
+ /// Number of top level canonical loop nests generated by this loop
+ /// transformation
+ unsigned NumGeneratedLoopNests = 0;
protected:
explicit OMPLoopTransformationDirective(StmtClass SC,
@@ -973,6 +976,9 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective {
/// Set the number of loops generated by this loop transformation.
void setNumGeneratedLoops(unsigned Num) { NumGeneratedLoops = Num; }
+ /// Set the number of top level canonical loop nests generated by this loop
+ /// transformation
+ void setNumGeneratedLoopNests(unsigned Num) { NumGeneratedLoopNests = Num; }
public:
/// Return the number of associated (consumed) loops.
@@ -981,6 +987,10 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective {
/// Return the number of loops generated by this loop transformation.
unsigned getNumGeneratedLoops() const { return NumGeneratedLoops; }
+ /// Return the number of top level canonical loop nests generated by this loop
+ /// transformation
+ unsigned getNumGeneratedLoopNests() const { return NumGeneratedLoopNests; }
+
/// Get the de-sugared statements after the loop transformation.
///
/// Might be nullptr if either the directive generates no loops and is handled
@@ -995,7 +1005,8 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective {
Stmt::StmtClass C = T->getStmtClass();
return C == OMPTileDirectiveClass || C == OMPUnrollDirectiveClass ||
C == OMPReverseDirectiveClass || C == OMPInterchangeDirectiveClass ||
- C == OMPStripeDirectiveClass;
+ C == OMPStripeDirectiveClass ||
+ C == OMPFuseDirectiveClass;
}
};
@@ -5562,6 +5573,7 @@ class OMPTileDirective final : public OMPLoopTransformationDirective {
llvm::omp::OMPD_tile, StartLoc, EndLoc,
NumLoops) {
setNumGeneratedLoops(2 * NumLoops);
+ setNumGeneratedLoopNests(1);
}
void setPreInits(Stmt *PreInits) {
@@ -5790,7 +5802,11 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective {
explicit OMPReverseDirective(SourceLocation StartLoc, SourceLocation EndLoc)
: OMPLoopTransformationDirective(OMPReverseDirectiveClass,
llvm::omp::OMPD_reverse, StartLoc,
- EndLoc, 1) {}
+ EndLoc, 1) {
+
+ setNumGeneratedLoopNests(1);
+ setNumGeneratedLoops(1);
+ }
void setPreInits(Stmt *PreInits) {
Data->getChildren()[PreInitsOffset] = PreInits;
@@ -5857,7 +5873,8 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective {
: OMPLoopTransformationDirective(OMPInterchangeDirectiveClass,
llvm::omp::OMPD_interchange, StartLoc,
EndLoc, NumLoops) {
- setNumGeneratedLoops(3 * NumLoops);
+ setNumGeneratedLoops(NumLoops);
+ setNumGeneratedLoopNests(1);
}
void setPreInits(Stmt *PreInits) {
@@ -5908,6 +5925,88 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective {
}
};
+/// Represents the '#pragma omp fuse' loop transformation directive
+///
+/// \code{c}
+/// #pragma omp fuse
+/// {
+/// for(int i = 0; i < m1; ++i) {...}
+/// for(int j = 0; j < m2; ++j) {...}
+/// ...
+/// }
+/// \endcode
+
+class OMPFuseDirective final : public OMPLoopTransformationDirective {
+ friend class ASTStmtReader;
+ friend class OMPExecutableDirective;
+
+ // Offsets of child members.
+ enum {
+ PreInitsOffset = 0,
+ TransformedStmtOffset,
+ };
+
+ explicit OMPFuseDirective(SourceLocation StartLoc, SourceLocation EndLoc,
+ unsigned NumLoops)
+ : OMPLoopTransformationDirective(OMPFuseDirectiveClass,
+ llvm::omp::OMPD_fuse, StartLoc, EndLoc,
+ NumLoops) {
+ setNumGeneratedLoops(1);
+ // TODO: After implementing the looprange clause, change this logic
+ setNumGeneratedLoopNests(1);
+ }
+
+ void setPreInits(Stmt *PreInits) {
+ Data->getChildren()[PreInitsOffset] = PreInits;
+ }
+
+ void setTransformedStmt(Stmt *S) {
+ Data->getChildren()[TransformedStmtOffset] = S;
+ }
+
+public:
+ /// Create a new AST node representation for #pragma omp fuse'
+ ///
+ /// \param C Context of the AST
+ /// \param StartLoc Location of the introducer (e.g the 'omp' token)
+ /// \param EndLoc Location of the directive's end (e.g the tok::eod)
+ /// \param Clauses The directive's clauses
+ /// \param NumLoops Number of total affected loops
+ /// \param NumLoopNests Number of affected top level canonical loops
+ /// (number of items in the 'looprange' clause if present)
+ /// \param AssociatedStmt The outermost associated loop
+ /// \param TransformedStmt The loop nest after fusion, or nullptr in
+ /// dependent
+ /// \param PreInits Helper preinits statements for the loop nest
+ static OMPFuseDirective *Create(const ASTContext &C, SourceLocation StartLoc,
+ SourceLocation EndLoc,
+ ArrayRef<OMPClause *> Clauses,
+ unsigned NumLoops, unsigned NumLoopNests,
+ Stmt *AssociatedStmt, Stmt *TransformedStmt,
+ Stmt *PreInits);
+
+ /// Build an empty '#pragma omp fuse' AST node for deserialization
+ ///
+ /// \param C Context of the AST
+ /// \param NumClauses Number of clauses to allocate
+ /// \param NumLoops Number of associated loops to allocate
+ static OMPFuseDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses,
+ unsigned NumLoops);
+
+ /// Gets the associated loops after the transformation. This is the de-sugared
+ /// replacement or nulltpr in dependent contexts.
+ Stmt *getTransformedStmt() const {
+ return Data->getChildren()[TransformedStmtOffset];
+ }
+
+ /// Return preinits statement.
+ Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; }
+
+ static bool classof(const Stmt *T) {
+ return T->getStmtClass() == OMPFuseDirectiveClass;
+ }
+};
+
/// This represents '#pragma omp scan' directive.
///
/// \code
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 78b36ceb88125..f31b6f8a3b26a 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11558,6 +11558,14 @@ def note_omp_implicit_dsa : Note<
"implicitly determined as %0">;
def err_omp_loop_var_dsa : Error<
"loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">;
+def warn_omp_different_loop_ind_var_types : Warning <
+ "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">;
+def err_omp_not_canonical_loop : Error <
+ "loop after '#pragma omp %0' is not in canonical form">;
+def err_omp_not_a_loop_sequence : Error <
+ "statement after '#pragma omp %0' must be a loop sequence containing canonical loops or loop-generating constructs">;
+def err_omp_empty_loop_sequence : Error <
+ "loop sequence after '#pragma omp %0' must contain at least 1 canonical loop or loop-generating construct">;
def err_omp_not_for : Error<
"%select{statement after '#pragma omp %1' must be a for loop|"
"expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">;
diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index 9526fa5808aa5..739160342062c 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -234,6 +234,7 @@ def OMPStripeDirective : StmtNode<OMPLoopTransformationDirective>;
def OMPUnrollDirective : StmtNode<OMPLoopTransformationDirective>;
def OMPReverseDirective : StmtNode<OMPLoopTransformationDirective>;
def OMPInterchangeDirective : StmtNode<OMPLoopTransformationDirective>;
+def OMPFuseDirective : StmtNode<OMPLoopTransformationDirective>;
def OMPForDirective : StmtNode<OMPLoopDirective>;
def OMPForSimdDirective : StmtNode<OMPLoopDirective>;
def OMPSectionsDirective : StmtNode<OMPExecutableDirective>;
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index 6498390fe96f7..8d78c2197c89d 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -457,6 +457,13 @@ class SemaOpenMP : public SemaBase {
Stmt *AStmt,
SourceLocation StartLoc,
SourceLocation EndLoc);
+
+ /// Called on well-formed '#pragma omp fuse' after parsing of its
+ /// clauses and the associated statement.
+ StmtResult ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
+ Stmt *AStmt, SourceLocation StartLoc,
+ SourceLocation EndLoc);
+
/// Called on well-formed '\#pragma omp for' after parsing
/// of the associated statement.
StmtResult
@@ -1480,6 +1487,26 @@ class SemaOpenMP : public SemaBase {
SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits);
+ /// Analyzes and checks a loop sequence for use by a loop transformation
+ ///
+ /// \param Kind The loop transformation directive kind.
+ /// \param NumLoops [out] Number of total canonical loops
+ /// \param LoopSeqSize [out] Number of top level canonical loops
+ /// \param LoopHelpers [out] The multiple loop analyses results.
+ /// \param LoopStmts [out] The multiple Stmt of each For loop.
+ /// \param OriginalInits [out] The multiple collection of statements and
+ /// declarations that must have been executed/declared
+ /// before entering the loop.
+ /// \param Context
+ /// \return Whether there was an absence of errors or not
+ bool checkTransformableLoopSequence(
+ OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize,
+ unsigned &NumLoops,
+ SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
+ SmallVectorImpl<Stmt *> &ForStmts,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
+ ASTContext &Context);
+
/// Helper to keep information about the current `omp begin/end declare
/// variant` nesting.
struct OMPDeclareVariantScope {
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 5cb9998126a85..8fe9d8248d66f 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1948,6 +1948,7 @@ enum StmtCode {
STMT_OMP_UNROLL_DIRECTIVE,
STMT_OMP_REVERSE_DIRECTIVE,
STMT_OMP_INTERCHANGE_DIRECTIVE,
+ STMT_OMP_FUSE_DIRECTIVE,
STMT_OMP_FOR_DIRECTIVE,
STMT_OMP_FOR_SIMD_DIRECTIVE,
STMT_OMP_SECTIONS_DIRECTIVE,
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index 093e1f659916f..4a6133766ef1c 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -456,6 +456,8 @@ OMPUnrollDirective::Create(const ASTContext &C, SourceLocation StartLoc,
auto *Dir = createDirective<OMPUnrollDirective>(
C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc);
Dir->setNumGeneratedLoops(NumGeneratedLoops);
+ // The number of generated loops and loop nests during unroll matches
+ Dir->setNumGeneratedLoopNests(NumGeneratedLoops);
Dir->setTransformedStmt(TransformedStmt);
Dir->setPreInits(PreInits);
return Dir;
@@ -505,6 +507,29 @@ OMPInterchangeDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
SourceLocation(), SourceLocation(), NumLoops);
}
+OMPFuseDirective *OMPFuseDirective::Create(
+ const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+ ArrayRef<OMPClause *> Clauses, unsigned NumLoops, unsigned NumLoopNests,
+ Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits) {
+
+ OMPFuseDirective *Dir = createDirective<OMPFuseDirective>(
+ C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc,
+ NumLoops);
+ Dir->setTransformedStmt(TransformedStmt);
+ Dir->setPreInits(PreInits);
+ Dir->setNumGeneratedLoopNests(NumLoopNests);
+ Dir->setNumGeneratedLoops(NumLoops);
+ return Dir;
+}
+
+OMPFuseDirective *OMPFuseDirective::CreateEmpty(const ASTContext &C,
+ unsigned NumClauses,
+ unsigned NumLoops) {
+ return createEmptyDirective<OMPFuseDirective>(
+ C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1,
+ SourceLocation(), SourceLocation(), NumLoops);
+}
+
OMPForSimdDirective *
OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
SourceLocation EndLoc, unsigned CollapsedNum,
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index dc8af1586624b..12a1d5a943704 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -791,6 +791,11 @@ void StmtPrinter::VisitOMPInterchangeDirective(OMPInterchangeDirective *Node) {
PrintOMPExecutableDirective(Node);
}
+void StmtPrinter::VisitOMPFuseDirective(OMPFuseDirective *Node) {
+ Indent() << "#pragma omp fuse";
+ PrintOMPExecutableDirective(Node);
+}
+
void StmtPrinter::VisitOMPForDirective(OMPForDirective *Node) {
Indent() << "#pragma omp for";
PrintOMPExecutableDirective(Node);
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index f7d1655f67ed1..99d426db985e8 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -1026,6 +1026,10 @@ void StmtProfiler::VisitOMPInterchangeDirective(
VisitOMPLoopTransformationDirective(S);
}
+void StmtProfiler::VisitOMPFuseDirective(const OMPFuseDirective *S) {
+ VisitOMPLoopTransformationDirective(S);
+}
+
void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) {
VisitOMPLoopDirective(S);
}
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index a451fc7c01841..d172450512f13 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -702,7 +702,7 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) {
bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) {
return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse ||
- DKind == OMPD_interchange || DKind == OMPD_stripe;
+ DKind == OMPD_interchange || DKind == OMPD_stripe || DKind == OMPD_fuse;
}
bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) {
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 3562b4ea22a24..4a2dc1a537d46 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -233,6 +233,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
case Stmt::OMPInterchangeDirectiveClass:
EmitOMPInterchangeDirective(cast<OMPInterchangeDirective>(*S));
break;
+ case Stmt::OMPFuseDirectiveClass:
+ EmitOMPFuseDirective(cast<OMPFuseDirective>(*S));
+ break;
case Stmt::OMPForDirectiveClass:
EmitOMPForDirective(cast<OMPForDirective>(*S));
break;
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 803c7ed37635e..0c664b0f89044 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -197,6 +197,8 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
} else if (const auto *Interchange =
dyn_cast<OMPInterchangeDirective>(&S)) {
PreInits = Interchange->getPreInits();
+ } else if (const auto *Fuse = dyn_cast<OMPFuseDirective>(&S)) {
+ PreInits = Fuse->getPreInits();
} else {
llvm_unreachable("Unknown loop-based directive kind.");
}
@@ -2918,6 +2920,12 @@ void CodeGenFunction::EmitOMPInterchangeDirective(
EmitStmt(S.getTransformedStmt());
}
+void CodeGenFunction::EmitOMPFuseDirective(const OMPFuseDirective &S) {
+ // Emit the de-sugared statement
+ OMPTransformDirectiveScopeRAII FuseScope(*this, &S);
+ EmitStmt(S.getTransformedStmt());
+}
+
void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 78d71fc822bcb..a983901f560de 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3906,6 +3906,7 @@ class CodeGenFunction : public CodeGenTypeCache {
void EmitOMPUnrollDirective(const OMPUnrollDirective &S);
void EmitOMPReverseDirective(const OMPReverseDirective &S);
void EmitOMPInterchangeDirective(const OMPInterchangeDirective &S);
+ void EmitOMPFuseDirective(const OMPFuseDirective &S);
void EmitOMPForDirective(const OMPForDirective &S);
void EmitOMPForSimdDirective(const OMPForSimdDirective &S);
void EmitOMPScopeDirective(const OMPScopeDirective &S);
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index c83eab53891ca..85a374e6eb9b2 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1491,6 +1491,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
case Stmt::OMPUnrollDirectiveClass:
case Stmt::OMPReverseDirectiveClass:
case Stmt::OMPInterchangeDirectiveClass:
+ case Stmt::OMPFuseDirectiveClass:
case Stmt::OMPSingleDirectiveClass:
case Stmt::OMPTargetDataDirectiveClass:
case Stmt::OMPTargetDirectiveClass:
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index f16f841d62edd..bd8bee64a9d2f 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -4404,6 +4404,7 @@ void SemaOpenMP::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind,
case OMPD_unroll:
case OMPD_reverse:
case OMPD_interchange:
+ case OMPD_fuse:
case OMPD_assume:
break;
default:
@@ -6221,6 +6222,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective(
Res = ActOnOpenMPInterchangeDirective(ClausesWithImplicit, AStmt, StartLoc,
EndLoc);
break;
+ case OMPD_fuse:
+ Res =
+ ActOnOpenMPFuseDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc);
+ break;
case OMPD_for:
Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc,
VarsWithInheritedDSA);
@@ -14193,6 +14198,8 @@ bool SemaOpenMP::checkTransformableLoopNest(
DependentPreInits = Dir->getPreInits();
else if (auto *Dir = dyn_cast<OMPInterchangeDirective>(Transform))
DependentPreInits = Dir->getPreInits();
+ else if (auto *Dir = dyn_cast<OMPFuseDirective>(Transform))
+ DependentPreInits = Dir->getPreInits();
else
llvm_unreachable("Unhandled loop transformation");
@@ -14203,6 +14210,265 @@ bool SemaOpenMP::checkTransformableLoopNest(
return Result;
}
+class NestedLoopCounterVisitor
+ : public clang::RecursiveASTVisitor<NestedLoopCounterVisitor> {
+public:
+ explicit NestedLoopCounterVisitor() : NestedLoopCount(0) {}
+
+ bool VisitForStmt(clang::ForStmt *FS) {
+ ++NestedLoopCount;
+ return true;
+ }
+
+ bool VisitCXXForRangeStmt(clang::CXXForRangeStmt *FRS) {
+ ++NestedLoopCount;
+ return true;
+ }
+
+ unsigned getNestedLoopCount() const { return NestedLoopCount; }
+
+private:
+ unsigned NestedLoopCount;
+};
+
+bool SemaOpenMP::checkTransformableLoopSequence(
+ OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize,
+ unsigned &NumLoops,
+ SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
+ SmallVectorImpl<Stmt *> &ForStmts,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
+ ASTContext &Context) {
+
+ // Checks whether the given statement is a compound statement
+ VarsWithInheritedDSAType TmpDSA;
+ if (!isa<CompoundStmt>(AStmt)) {
+ Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence)
+ << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+ // Callback for updating pre-inits in case there are even more
+ // loop-sequence-generating-constructs inside of the main compound stmt
+ auto OnTransformationCallback =
+ [&OriginalInits](OMPLoopBasedDirective *Transform) {
+ Stmt *DependentPreInits;
+ if (auto *Dir = dyn_cast<OMPTileDirective>(Transform))
+ DependentPreInits = Dir->getPreInits();
+ else if (auto *Dir = dyn_cast<OMPUnrollDirective>(Transform))
+ DependentPreInits = Dir->getPreInits();
+ else if (auto *Dir = dyn_cast<OMPReverseDirective>(Transform))
+ DependentPreInits = Dir->getPreInits();
+ else if (auto *Dir = dyn_cast<OMPInterchangeDirective>(Transform))
+ DependentPreInits = Dir->getPreInits();
+ else if (auto *Dir = dyn_cast<OMPFuseDirective>(Transform))
+ DependentPreInits = Dir->getPreInits();
+ else
+ llvm_unreachable("Unhandled loop transformation");
+
+ appendFlattenedStmtList(OriginalInits.back(), DependentPreInits);
+ };
+
+ // Number of top level canonical loop nests observed (And acts as index)
+ LoopSeqSize = 0;
+ // Number of total observed loops
+ NumLoops = 0;
+
+ // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows
+ // the grammar:
+ //
+ // canonical-loop-sequence:
+ // {
+ // loop-sequence+
+ // }
+ // where loop-sequence can be any of the following:
+ // 1. canonical-loop-sequence
+ // 2. loop-nest
+ // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective)
+ //
+ // To recognise and traverse this structure the following helper functions
+ // have been defined. handleLoopSequence serves as the recurisve entry point
+ // and tries to match the input AST to the canonical loop sequence grammar
+ // structure
+
+ auto NLCV = NestedLoopCounterVisitor();
+ // Helper functions to validate canonical loop sequence grammar is valid
+ auto isLoopSequenceDerivation = [](auto *Child) {
+ return isa<ForStmt>(Child) || isa<CXXForRangeStmt>(Child) ||
+ isa<OMPLoopTransformationDirective>(Child);
+ };
+ auto isLoopGeneratingStmt = [](auto *Child) {
+ return isa<OMPLoopTransformationDirective>(Child);
+ };
+
+ // Helper Lambda to handle storing initialization and body statements for both
+ // ForStmt and CXXForRangeStmt and checks for any possible mismatch between
+ // induction variables types
+ QualType BaseInductionVarType;
+ auto storeLoopStatements = [&OriginalInits, &ForStmts, &BaseInductionVarType,
+ this, &Context](Stmt *LoopStmt) {
+ if (auto *For = dyn_cast<ForStmt>(LoopStmt)) {
+ OriginalInits.back().push_back(For->getInit());
+ ForStmts.push_back(For);
+ // Extract induction variable
+ if (auto *InitStmt = dyn_cast_or_null<DeclStmt>(For->getInit())) {
+ if (auto *InitDecl = dyn_cast<VarDecl>(InitStmt->getSingleDecl())) {
+ QualType InductionVarType = InitDecl->getType().getCanonicalType();
+
+ // Compare with first loop type
+ if (BaseInductionVarType.isNull()) {
+ BaseInductionVarType = InductionVarType;
+ } else if (!Context.hasSameType(BaseInductionVarType,
+ InductionVarType)) {
+ Diag(InitDecl->getBeginLoc(),
+ diag::warn_omp_different_loop_ind_var_types)
+ << getOpenMPDirectiveName(OMPD_fuse) << BaseInductionVarType
+ << InductionVarType;
+ }
+ }
+ }
+
+ } else {
+ assert(isa<CXXForRangeStmt>(LoopStmt) &&
+ "Expected canonical for or range-based for loops.");
+ auto *CXXFor = dyn_cast<CXXForRangeStmt>(LoopStmt);
+ OriginalInits.back().push_back(CXXFor->getBeginStmt());
+ ForStmts.push_back(CXXFor);
+ }
+ };
+ // Helper lambda functions to encapsulate the processing of different
+ // derivations of the canonical loop sequence grammar
+ //
+ // Modularized code for handling loop generation and transformations
+ auto handleLoopGeneration = [&storeLoopStatements, &LoopHelpers,
+ &OriginalInits, &LoopSeqSize, &NumLoops, Kind,
+ &TmpDSA, &OnTransformationCallback,
+ this](Stmt *Child) {
+ auto LoopTransform = dyn_cast<OMPLoopTransformationDirective>(Child);
+ Stmt *TransformedStmt = LoopTransform->getTransformedStmt();
+ unsigned NumGeneratedLoopNests = LoopTransform->getNumGeneratedLoopNests();
+
+ // Handle the case where transformed statement is not available due to
+ // dependent contexts
+ if (!TransformedStmt) {
+ if (NumGeneratedLoopNests > 0)
+ return true;
+ // Unroll full
+ else {
+ Diag(Child->getBeginLoc(), diag::err_omp_not_for)
+ << 0 << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+ }
+ // Handle loop transformations with multiple loop nests
+ // Unroll full
+ if (NumGeneratedLoopNests <= 0) {
+ Diag(Child->getBeginLoc(), diag::err_omp_not_for)
+ << 0 << getOpenMPDirectiveName(Kind);
+ return false;
+ // Future loop transformations that generate multiple canonical loops
+ } else if (NumGeneratedLoopNests > 1) {
+ llvm_unreachable("Multiple canonical loop generating transformations "
+ "like loop splitting are not yet supported");
+ }
+
+ // Process the transformed loop statement
+ Child = TransformedStmt;
+ OriginalInits.emplace_back();
+ LoopHelpers.emplace_back();
+ OnTransformationCallback(LoopTransform);
+
+ unsigned IsCanonical =
+ checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack,
+ TmpDSA, LoopHelpers[LoopSeqSize]);
+
+ if (!IsCanonical) {
+ Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop)
+ << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+ storeLoopStatements(TransformedStmt);
+ NumLoops += LoopTransform->getNumGeneratedLoops();
+ return true;
+ };
+
+ // Modularized code for handling regular canonical loops
+ auto handleRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits,
+ &LoopSeqSize, &NumLoops, Kind, &TmpDSA, &NLCV,
+ this](Stmt *Child) {
+ OriginalInits.emplace_back();
+ LoopHelpers.emplace_back();
+ unsigned IsCanonical =
+ checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack,
+ TmpDSA, LoopHelpers[LoopSeqSize]);
+
+ if (!IsCanonical) {
+ Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop)
+ << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+ storeLoopStatements(Child);
+ NumLoops += NLCV.TraverseStmt(Child);
+ return true;
+ };
+
+ // Helper function to process a Loop Sequence Recursively
+ auto handleLoopSequence = [&](Stmt *LoopSeqStmt,
+ auto &handleLoopSequenceCallback) -> bool {
+ for (auto *Child : LoopSeqStmt->children()) {
+ if (!Child)
+ continue;
+
+ // Skip over non-loop-sequence statements
+ if (!isLoopSequenceDerivation(Child)) {
+ Child = Child->IgnoreContainers();
+
+ // Ignore empty compound statement
+ if (!Child)
+ continue;
+
+ // In the case of a nested loop sequence ignoring containers would not
+ // be enough, a recurisve transversal of the loop sequence is required
+ if (isa<CompoundStmt>(Child)) {
+ if (!handleLoopSequenceCallback(Child, handleLoopSequenceCallback))
+ return false;
+ // Already been treated, skip this children
+ continue;
+ }
+ }
+ // Regular loop sequence handling
+ if (isLoopSequenceDerivation(Child)) {
+ if (isLoopGeneratingStmt(Child)) {
+ if (!handleLoopGeneration(Child)) {
+ return false;
+ }
+ } else {
+ if (!handleRegularLoop(Child)) {
+ return false;
+ }
+ }
+ ++LoopSeqSize;
+ } else {
+ // Report error for invalid statement inside canonical loop sequence
+ Diag(Child->getBeginLoc(), diag::err_omp_not_for)
+ << 0 << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+ }
+ return true;
+ };
+
+ // Recursive entry point to process the main loop sequence
+ if (!handleLoopSequence(AStmt, handleLoopSequence)) {
+ return false;
+ }
+
+ if (LoopSeqSize <= 0) {
+ Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence)
+ << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+ return true;
+}
+
/// Add preinit statements that need to be propageted from the selected loop.
static void addLoopPreInits(ASTContext &Context,
OMPLoopBasedDirective::HelperExprs &LoopHelper,
@@ -15462,6 +15728,340 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective(
buildPreInits(Context, PreInits));
}
+StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
+ Stmt *AStmt,
+ SourceLocation StartLoc,
+ SourceLocation EndLoc) {
+ ASTContext &Context = getASTContext();
+ DeclContext *CurrContext = SemaRef.CurContext;
+ Scope *CurScope = SemaRef.getCurScope();
+ CaptureVars CopyTransformer(SemaRef);
+
+ // Ensure the structured block is not empty
+ if (!AStmt) {
+ return StmtError();
+ }
+ // Validate that the potential loop sequence is transformable for fusion
+ // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops
+ SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers;
+ SmallVector<Stmt *> LoopStmts;
+ SmallVector<SmallVector<Stmt *, 0>> OriginalInits;
+
+ unsigned NumLoops;
+ // TODO: Support looprange clause using LoopSeqSize
+ unsigned LoopSeqSize;
+ if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops,
+ LoopHelpers, LoopStmts, OriginalInits,
+ Context)) {
+ return StmtError();
+ }
+
+ // Defer transformation in dependent contexts
+ if (CurrContext->isDependentContext()) {
+ return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses,
+ NumLoops, 1, AStmt, nullptr, nullptr);
+ }
+ assert(LoopHelpers.size() == LoopSeqSize &&
+ "Expecting loop iteration space dimensionality to match number of "
+ "affected loops");
+ assert(OriginalInits.size() == LoopSeqSize &&
+ "Expecting loop iteration space dimensionality to match number of "
+ "affected loops");
+
+ // PreInits hold a sequence of variable declarations that must be executed
+ // before the fused loop begins. These include bounds, strides, and other
+ // helper variables required for the transformation.
+ SmallVector<Stmt *> PreInits;
+
+ // Select the type with the largest bit width among all induction variables
+ QualType IVType = LoopHelpers[0].IterationVarRef->getType();
+ for (unsigned int I = 1; I < LoopSeqSize; ++I) {
+ QualType CurrentIVType = LoopHelpers[I].IterationVarRef->getType();
+ if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) {
+ IVType = CurrentIVType;
+ }
+ }
+ uint64_t IVBitWidth = Context.getIntWidth(IVType);
+
+ // Create pre-init declarations for all loops lower bounds, upper bounds,
+ // strides and num-iterations
+ SmallVector<VarDecl *, 4> LBVarDecls;
+ SmallVector<VarDecl *, 4> STVarDecls;
+ SmallVector<VarDecl *, 4> NIVarDecls;
+ SmallVector<VarDecl *, 4> UBVarDecls;
+ SmallVector<VarDecl *, 4> IVVarDecls;
+
+ // Helper lambda to create variables for bounds, strides, and other
+ // expressions. Generates both the variable declaration and the corresponding
+ // initialization statement.
+ auto CreateHelperVarAndStmt =
+ [&SemaRef = this->SemaRef, &Context, &CopyTransformer,
+ &IVType](Expr *ExprToCopy, const std::string &BaseName, unsigned I,
+ bool NeedsNewVD = false) {
+ Expr *TransformedExpr =
+ AssertSuccess(CopyTransformer.TransformExpr(ExprToCopy));
+ if (!TransformedExpr)
+ return std::pair<VarDecl *, StmtResult>(nullptr, StmtError());
+
+ auto Name = (Twine(".omp.") + BaseName + std::to_string(I)).str();
+
+ VarDecl *VD;
+ if (NeedsNewVD) {
+ VD = buildVarDecl(SemaRef, SourceLocation(), IVType, Name);
+ SemaRef.AddInitializerToDecl(VD, TransformedExpr, false);
+
+ } else {
+ // Create a unique variable name
+ DeclRefExpr *DRE = cast<DeclRefExpr>(TransformedExpr);
+ VD = cast<VarDecl>(DRE->getDecl());
+ VD->setDeclName(&SemaRef.PP.getIdentifierTable().get(Name));
+ }
+ // Create the corresponding declaration statement
+ StmtResult DeclStmt = new (Context) class DeclStmt(
+ DeclGroupRef(VD), SourceLocation(), SourceLocation());
+ return std::make_pair(VD, DeclStmt);
+ };
+
+ // Process each single loop to generate and collect declarations
+ // and statements for all helper expressions
+ for (unsigned int I = 0; I < LoopSeqSize; ++I) {
+ addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I],
+ PreInits);
+
+ auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", I);
+ auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", I);
+ auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", I);
+ auto [NIVD, NIDStmt] =
+ CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", I, true);
+ auto [IVVD, IVDStmt] =
+ CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", I);
+
+ if (!LBVD || !STVD || !NIVD || !IVVD)
+ return StmtError();
+
+ UBVarDecls.push_back(UBVD);
+ LBVarDecls.push_back(LBVD);
+ STVarDecls.push_back(STVD);
+ NIVarDecls.push_back(NIVD);
+ IVVarDecls.push_back(IVVD);
+
+ PreInits.push_back(UBDStmt.get());
+ PreInits.push_back(LBDStmt.get());
+ PreInits.push_back(STDStmt.get());
+ PreInits.push_back(NIDStmt.get());
+ PreInits.push_back(IVDStmt.get());
+ }
+
+ auto MakeVarDeclRef = [&SemaRef = this->SemaRef](VarDecl *VD) {
+ return buildDeclRefExpr(SemaRef, VD, VD->getType(), VD->getLocation(),
+ false);
+ };
+
+ // Following up the creation of the final fused loop will be performed
+ // which has the following shape (considering the selected loops):
+ //
+ // for (fuse.index = 0; fuse.index < max(ni0, ni1..., nik); ++fuse.index) {
+ // if (fuse.index < ni0){
+ // iv0 = lb0 + st0 * fuse.index;
+ // original.index0 = iv0
+ // body(0);
+ // }
+ // if (fuse.index < ni1){
+ // iv1 = lb1 + st1 * fuse.index;
+ // original.index1 = iv1
+ // body(1);
+ // }
+ //
+ // ...
+ //
+ // if (fuse.index < nik){
+ // ivk = lbk + stk * fuse.index;
+ // original.indexk = ivk
+ // body(k); Expr *InitVal = IntegerLiteral::Create(Context,
+ // llvm::APInt(IVWidth, 0),
+
+ // }
+
+ // 1. Create the initialized fuse index
+ const std::string IndexName = Twine(".omp.fuse.index").str();
+ Expr *InitVal = IntegerLiteral::Create(Context, llvm::APInt(IVBitWidth, 0),
+ IVType, SourceLocation());
+ VarDecl *IndexDecl =
+ buildVarDecl(SemaRef, {}, IVType, IndexName, nullptr, nullptr);
+ SemaRef.AddInitializerToDecl(IndexDecl, InitVal, false);
+ StmtResult InitStmt = new (Context)
+ DeclStmt(DeclGroupRef(IndexDecl), SourceLocation(), SourceLocation());
+
+ if (!InitStmt.isUsable())
+ return StmtError();
+
+ auto MakeIVRef = [&SemaRef = this->SemaRef, IndexDecl, IVType,
+ Loc = InitVal->getExprLoc()]() {
+ return buildDeclRefExpr(SemaRef, IndexDecl, IVType, Loc, false);
+ };
+
+ // 2. Iteratively compute the max number of logical iterations Max(NI_1, NI_2,
+ // ..., NI_k)
+ //
+ // This loop accumulates the maximum value across multiple expressions,
+ // ensuring each step constructs a unique AST node for correctness. By using
+ // intermediate temporary variables and conditional operators, we maintain
+ // distinct nodes and avoid duplicating subtrees, For instance, max(a,b,c):
+ // omp.temp0 = max(a, b)
+ // omp.temp1 = max(omp.temp0, c)
+ // omp.fuse.max = max(omp.temp1, omp.temp0)
+
+ ExprResult MaxExpr;
+ for (unsigned I = 0; I < LoopSeqSize; ++I) {
+ DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[I]);
+ QualType NITy = NIRef->getType();
+
+ if (MaxExpr.isUnset()) {
+ // Initialize MaxExpr with the first NI expression
+ MaxExpr = NIRef;
+ } else {
+ // Create a new acummulator variable t_i = MaxExpr
+ std::string TempName = (Twine(".omp.temp.") + Twine(I)).str();
+ VarDecl *TempDecl =
+ buildVarDecl(SemaRef, {}, NITy, TempName, nullptr, nullptr);
+ TempDecl->setInit(MaxExpr.get());
+ DeclRefExpr *TempRef =
+ buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false);
+ DeclRefExpr *TempRef2 =
+ buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false);
+ // Add a DeclStmt to PreInits to ensure the variable is declared.
+ StmtResult TempStmt = new (Context)
+ DeclStmt(DeclGroupRef(TempDecl), SourceLocation(), SourceLocation());
+
+ if (!TempStmt.isUsable())
+ return StmtError();
+ PreInits.push_back(TempStmt.get());
+
+ // Build MaxExpr <-(MaxExpr > NIRef ? MaxExpr : NIRef)
+ ExprResult Comparison =
+ SemaRef.BuildBinOp(nullptr, SourceLocation(), BO_GT, TempRef, NIRef);
+ // Handle any errors in Comparison creation
+ if (!Comparison.isUsable())
+ return StmtError();
+
+ DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[I]);
+ // Update MaxExpr using a conditional expression to hold the max value
+ MaxExpr = new (Context) ConditionalOperator(
+ Comparison.get(), SourceLocation(), TempRef2, SourceLocation(),
+ NIRef2->getExprStmt(), NITy, VK_LValue, OK_Ordinary);
+
+ if (!MaxExpr.isUsable())
+ return StmtError();
+ }
+ }
+ if (!MaxExpr.isUsable())
+ return StmtError();
+
+ // 3. Declare the max variable
+ const std::string MaxName = Twine(".omp.fuse.max").str();
+ VarDecl *MaxDecl =
+ buildVarDecl(SemaRef, {}, IVType, MaxName, nullptr, nullptr);
+ MaxDecl->setInit(MaxExpr.get());
+ DeclRefExpr *MaxRef = buildDeclRefExpr(SemaRef, MaxDecl, IVType, {}, false);
+ StmtResult MaxStmt = new (Context)
+ DeclStmt(DeclGroupRef(MaxDecl), SourceLocation(), SourceLocation());
+
+ if (MaxStmt.isInvalid())
+ return StmtError();
+ PreInits.push_back(MaxStmt.get());
+
+ // 4. Create condition Expr: index < n_max
+ ExprResult CondExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT,
+ MakeIVRef(), MaxRef);
+ if (!CondExpr.isUsable())
+ return StmtError();
+ // 5. Increment Expr: ++index
+ ExprResult IncrExpr =
+ SemaRef.BuildUnaryOp(CurScope, SourceLocation(), UO_PreInc, MakeIVRef());
+ if (!IncrExpr.isUsable())
+ return StmtError();
+
+ // 6. Build the Fused Loop Body
+ // The final fused loop iterates over the maximum logical range. Inside the
+ // loop, each original loop's index is calculated dynamically, and its body
+ // is executed conditionally.
+ //
+ // Each sub-loop's body is guarded by a conditional statement to ensure
+ // it executes only within its logical iteration range:
+ //
+ // if (fuse.index < ni_k){
+ // iv_k = lb_k + st_k * fuse.index;
+ // original.index = iv_k
+ // body(k);
+ // }
+
+ CompoundStmt *FusedBody = nullptr;
+ SmallVector<Stmt *, 4> FusedBodyStmts;
+ for (unsigned I = 0; I < LoopSeqSize; ++I) {
+
+ // Assingment of the original sub-loop index to compute the logical index
+ // IV_k = LB_k + omp.fuse.index * ST_k
+
+ ExprResult IdxExpr =
+ SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul,
+ MakeVarDeclRef(STVarDecls[I]), MakeIVRef());
+ if (!IdxExpr.isUsable())
+ return StmtError();
+ IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Add,
+ MakeVarDeclRef(LBVarDecls[I]), IdxExpr.get());
+
+ if (!IdxExpr.isUsable())
+ return StmtError();
+ IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Assign,
+ MakeVarDeclRef(IVVarDecls[I]), IdxExpr.get());
+ if (!IdxExpr.isUsable())
+ return StmtError();
+
+ // Update the original i_k = IV_k
+ SmallVector<Stmt *, 4> BodyStmts;
+ BodyStmts.push_back(IdxExpr.get());
+ llvm::append_range(BodyStmts, LoopHelpers[I].Updates);
+
+ if (auto *SourceCXXFor = dyn_cast<CXXForRangeStmt>(LoopStmts[I]))
+ BodyStmts.push_back(SourceCXXFor->getLoopVarStmt());
+
+ Stmt *Body = (isa<ForStmt>(LoopStmts[I]))
+ ? cast<ForStmt>(LoopStmts[I])->getBody()
+ : cast<CXXForRangeStmt>(LoopStmts[I])->getBody();
+
+ BodyStmts.push_back(Body);
+
+ CompoundStmt *CombinedBody =
+ CompoundStmt::Create(Context, BodyStmts, FPOptionsOverride(),
+ SourceLocation(), SourceLocation());
+ ExprResult Condition =
+ SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, MakeIVRef(),
+ MakeVarDeclRef(NIVarDecls[I]));
+
+ if (!Condition.isUsable())
+ return StmtError();
+
+ IfStmt *IfStatement = IfStmt::Create(
+ Context, SourceLocation(), IfStatementKind::Ordinary, nullptr, nullptr,
+ Condition.get(), SourceLocation(), SourceLocation(), CombinedBody,
+ SourceLocation(), nullptr);
+
+ FusedBodyStmts.push_back(IfStatement);
+ }
+ FusedBody = CompoundStmt::Create(Context, FusedBodyStmts, FPOptionsOverride(),
+ SourceLocation(), SourceLocation());
+
+ // 7. Construct the final fused loop
+ ForStmt *FusedForStmt = new (Context)
+ ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr, IncrExpr.get(),
+ FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(),
+ IncrExpr.get()->getEndLoc());
+
+ return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, NumLoops,
+ 1, AStmt, FusedForStmt,
+ buildPreInits(Context, PreInits));
+}
+
OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind,
Expr *Expr,
SourceLocation StartLoc,
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 335e21d927b76..034b0c8243667 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -9666,6 +9666,17 @@ StmtResult TreeTransform<Derived>::TransformOMPInterchangeDirective(
return Res;
}
+template <typename Derived>
+StmtResult
+TreeTransform<Derived>::TransformOMPFuseDirective(OMPFuseDirective *D) {
+ DeclarationNameInfo DirName;
+ getDerived().getSema().OpenMP().StartOpenMPDSABlock(
+ D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc());
+ StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+ getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get());
+ return Res;
+}
+
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPForDirective(OMPForDirective *D) {
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 0ba0378754eb4..6762d11d6b73e 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2449,6 +2449,7 @@ void ASTStmtReader::VisitOMPLoopTransformationDirective(
OMPLoopTransformationDirective *D) {
VisitOMPLoopBasedDirective(D);
D->setNumGeneratedLoops(Record.readUInt32());
+ D->setNumGeneratedLoopNests(Record.readUInt32());
}
void ASTStmtReader::VisitOMPTileDirective(OMPTileDirective *D) {
@@ -2471,6 +2472,10 @@ void ASTStmtReader::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) {
VisitOMPLoopTransformationDirective(D);
}
+void ASTStmtReader::VisitOMPFuseDirective(OMPFuseDirective *D) {
+ VisitOMPLoopTransformationDirective(D);
+}
+
void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) {
VisitOMPLoopDirective(D);
D->setHasCancel(Record.readBool());
@@ -3613,6 +3618,12 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
S = OMPReverseDirective::CreateEmpty(Context);
break;
}
+ case STMT_OMP_FUSE_DIRECTIVE: {
+ unsigned NumLoops = Record[ASTStmtReader::NumStmtFields];
+ unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1];
+ S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops);
+ break;
+ }
case STMT_OMP_INTERCHANGE_DIRECTIVE: {
unsigned NumLoops = Record[ASTStmtReader::NumStmtFields];
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index b9eabd5ddb64c..8b909d5c93686 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -2454,6 +2454,7 @@ void ASTStmtWriter::VisitOMPLoopTransformationDirective(
OMPLoopTransformationDirective *D) {
VisitOMPLoopBasedDirective(D);
Record.writeUInt32(D->getNumGeneratedLoops());
+ Record.writeUInt32(D->getNumGeneratedLoopNests());
}
void ASTStmtWriter::VisitOMPTileDirective(OMPTileDirective *D) {
@@ -2481,6 +2482,11 @@ void ASTStmtWriter::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) {
Code = serialization::STMT_OMP_INTERCHANGE_DIRECTIVE;
}
+void ASTStmtWriter::VisitOMPFuseDirective(OMPFuseDirective *D) {
+ VisitOMPLoopTransformationDirective(D);
+ Code = serialization::STMT_OMP_FUSE_DIRECTIVE;
+}
+
void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) {
VisitOMPLoopDirective(D);
Record.writeBool(D->hasCancel());
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 1afd4b52eb354..036945b2d1700 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1817,6 +1817,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
case Stmt::OMPStripeDirectiveClass:
case Stmt::OMPTileDirectiveClass:
case Stmt::OMPInterchangeDirectiveClass:
+ case Stmt::OMPFuseDirectiveClass:
case Stmt::OMPInteropDirectiveClass:
case Stmt::OMPDispatchDirectiveClass:
case Stmt::OMPMaskedDirectiveClass:
diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp
new file mode 100644
index 0000000000000..43ce815dab024
--- /dev/null
+++ b/clang/test/OpenMP/fuse_ast_print.cpp
@@ -0,0 +1,278 @@
+// Check no warnings/errors
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -fsyntax-only -verify %s
+// expected-no-diagnostics
+
+// Check AST and unparsing
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -emit-pch -o %t %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-print %s | FileCheck %s --check-prefix=PRINT
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code
+extern "C" void body(...);
+
+// PRINT-LABEL: void foo1(
+// DUMP-LABEL: FunctionDecl {{.*}} foo1
+void foo1() {
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i < 10; i += 2)
+ // DUMP: ForStmt
+ for (int i = 0; i < 10; i += 2)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: for (int j = 10; j > 0; --j)
+ // DUMP: ForStmt
+ for (int j = 10; j > 0; --j)
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+ // PRINT: for (int k = 0; k <= 10; ++k)
+ // DUMP: ForStmt
+ for (int k = 0; k <= 10; ++k)
+ // PRINT: body(k)
+ // DUMP: CallExpr
+ body(k);
+
+ }
+
+}
+
+// PRINT-LABEL: void foo2(
+// DUMP-LABEL: FunctionDecl {{.*}} foo2
+void foo2() {
+ // PRINT: #pragma omp unroll partial(4)
+ // DUMP: OMPUnrollDirective
+ // DUMP-NEXT: OMPPartialClause
+ // DUMP-NEXT: ConstantExpr
+ // DUMP-NEXT: value: Int 4
+ // DUMP-NEXT: IntegerLiteral {{.*}} 4
+ #pragma omp unroll partial(4)
+ // PRINT: #pragma omp fuse
+ // DUMP-NEXT: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i < 10; i += 2)
+ // DUMP: ForStmt
+ for (int i = 0; i < 10; i += 2)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: for (int j = 10; j > 0; --j)
+ // DUMP: ForStmt
+ for (int j = 10; j > 0; --j)
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+ }
+
+}
+
+//PRINT-LABEL: void foo3(
+//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo3
+template<int Factor1, int Factor2>
+void foo3() {
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: #pragma omp unroll partial(Factor1)
+ // DUMP: OMPUnrollDirective
+ #pragma omp unroll partial(Factor1)
+ // PRINT: for (int i = 0; i < 12; i += 1)
+ // DUMP: ForStmt
+ for (int i = 0; i < 12; i += 1)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: #pragma omp unroll partial(Factor2)
+ // DUMP: OMPUnrollDirective
+ #pragma omp unroll partial(Factor2)
+ // PRINT: for (int k = 0; k <= 10; ++k)
+ // DUMP: ForStmt
+ for (int k = 0; k <= 10; ++k)
+ // PRINT: body(k)
+ // DUMP: CallExpr
+ body(k);
+
+ }
+}
+
+// Also test instantiating the template.
+void tfoo3() {
+ foo3<4,2>();
+}
+
+//PRINT-LABEL: void foo4(
+//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo4
+template<typename T, T Step>
+void foo4(int start, int end) {
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (T i = start; i < end; i += Step)
+ // DUMP: ForStmt
+ for (T i = start; i < end; i += Step)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+
+ // PRINT: for (T j = end; j > start; j -= Step)
+ // DUMP: ForStmt
+ for (T j = end; j > start; j -= Step) {
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+ }
+
+ }
+}
+
+// Also test instantiating the template.
+void tfoo4() {
+ foo4<int, 4>(0, 64);
+}
+
+
+
+// PRINT-LABEL: void foo5(
+// DUMP-LABEL: FunctionDecl {{.*}} foo5
+void foo5() {
+ double arr[128], arr2[128];
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT-NEXT: for (auto &&a : arr)
+ // DUMP-NEXT: CXXForRangeStmt
+ for (auto &&a: arr)
+ // PRINT: body(a)
+ // DUMP: CallExpr
+ body(a);
+ // PRINT: for (double v = 42; auto &&b : arr)
+ // DUMP: CXXForRangeStmt
+ for (double v = 42; auto &&b: arr)
+ // PRINT: body(b, v);
+ // DUMP: CallExpr
+ body(b, v);
+ // PRINT: for (auto &&c : arr2)
+ // DUMP: CXXForRangeStmt
+ for (auto &&c: arr2)
+ // PRINT: body(c)
+ // DUMP: CallExpr
+ body(c);
+
+ }
+
+}
+
+// PRINT-LABEL: void foo6(
+// DUMP-LABEL: FunctionDecl {{.*}} foo6
+void foo6() {
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i <= 10; ++i)
+ // DUMP: ForStmt
+ for (int i = 0; i <= 10; ++i)
+ body(i);
+ // PRINT: for (int j = 0; j < 100; ++j)
+ // DUMP: ForStmt
+ for(int j = 0; j < 100; ++j)
+ body(j);
+ }
+ // PRINT: #pragma omp unroll partial(4)
+ // DUMP: OMPUnrollDirective
+ #pragma omp unroll partial(4)
+ // PRINT: for (int k = 0; k < 250; ++k)
+ // DUMP: ForStmt
+ for (int k = 0; k < 250; ++k)
+ body(k);
+ }
+}
+
+// PRINT-LABEL: void foo7(
+// DUMP-LABEL: FunctionDecl {{.*}} foo7
+void foo7() {
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i < 10; i += 2)
+ // DUMP: ForStmt
+ for (int i = 0; i < 10; i += 2)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: for (int j = 10; j > 0; --j)
+ // DUMP: ForStmt
+ for (int j = 10; j > 0; --j)
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+ }
+ }
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int k = 0; k <= 10; ++k)
+ // DUMP: ForStmt
+ for (int k = 0; k <= 10; ++k)
+ // PRINT: body(k)
+ // DUMP: CallExpr
+ body(k);
+ }
+ }
+ }
+ }
+
+}
+
+
+
+
+
+#endif
\ No newline at end of file
diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp
new file mode 100644
index 0000000000000..6c1e21092da43
--- /dev/null
+++ b/clang/test/OpenMP/fuse_codegen.cpp
@@ -0,0 +1,1511 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 5
+// expected-no-diagnostics
+
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2
+
+#ifndef HEADER
+#define HEADER
+
+//placeholder for loop body code.
+extern "C" void body(...) {}
+
+extern "C" void foo1(int start1, int end1, int step1, int start2, int end2, int step2) {
+ int i,j;
+ #pragma omp fuse
+ {
+ for(i = start1; i < end1; i += step1) body(i);
+ for(j = start2; j < end2; j += step2) body(j);
+ }
+
+}
+
+template <typename T>
+void foo2(T start, T end, T step){
+ T i,j,k;
+ #pragma omp fuse
+ {
+ for(i = start; i < end; i += step) body(i);
+ for(j = end; j > start; j -= step) body(j);
+ for(k = start+step; k < end+step; k += step) body(k);
+ }
+}
+
+extern "C" void tfoo2() {
+ foo2<int>(0, 64, 4);
+}
+
+extern "C" void foo3() {
+ double arr[256];
+ #pragma omp fuse
+ {
+ #pragma omp fuse
+ {
+ for(int i = 0; i < 128; ++i) body(i);
+ for(int j = 0; j < 256; j+=2) body(j);
+ }
+ for(int c = 42; auto &&v: arr) body(c,v);
+ for(int cc = 37; auto &&vv: arr) body(cc, vv);
+ }
+}
+
+
+#endif
+// CHECK1-LABEL: define dso_local void @body(
+// CHECK1-SAME: ...) #[[ATTR0:[0-9]+]] {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @foo1(
+// CHECK1-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]]
+// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1: [[COND_TRUE]]:
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: br label %[[COND_END:.*]]
+// CHECK1: [[COND_FALSE]]:
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: br label %[[COND_END]]
+// CHECK1: [[COND_END]]:
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND:.*]]
+// CHECK1: [[FOR_COND]]:
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
+// CHECK1-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1: [[FOR_BODY]]:
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]]
+// CHECK1-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK1: [[IF_THEN]]:
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]]
+// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]]
+// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]]
+// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]]
+// CHECK1-NEXT: store i32 [[ADD20]], ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP35]])
+// CHECK1-NEXT: br label %[[IF_END]]
+// CHECK1: [[IF_END]]:
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]]
+// CHECK1-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]]
+// CHECK1: [[IF_THEN22]]:
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]]
+// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]]
+// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]]
+// CHECK1-NEXT: store i32 [[ADD26]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]])
+// CHECK1-NEXT: br label %[[IF_END27]]
+// CHECK1: [[IF_END27]]:
+// CHECK1-NEXT: br label %[[FOR_INC:.*]]
+// CHECK1: [[FOR_INC]]:
+// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1
+// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK1: [[FOR_END]]:
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @tfoo2(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4)
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_(
+// CHECK1-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
+// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]]
+// CHECK1-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]]
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]]
+// CHECK1-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1
+// CHECK1-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK1-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1
+// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]]
+// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1: [[COND_TRUE]]:
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: br label %[[COND_END:.*]]
+// CHECK1: [[COND_FALSE]]:
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: br label %[[COND_END]]
+// CHECK1: [[COND_END]]:
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]]
+// CHECK1-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]]
+// CHECK1: [[COND_TRUE30]]:
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT: br label %[[COND_END32:.*]]
+// CHECK1: [[COND_FALSE31]]:
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT: br label %[[COND_END32]]
+// CHECK1: [[COND_END32]]:
+// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ]
+// CHECK1-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND:.*]]
+// CHECK1: [[FOR_COND]]:
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1: [[FOR_BODY]]:
+// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]]
+// CHECK1-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK1: [[IF_THEN]]:
+// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]]
+// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]]
+// CHECK1-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]]
+// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]]
+// CHECK1-NEXT: store i32 [[ADD38]], ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]])
+// CHECK1-NEXT: br label %[[IF_END]]
+// CHECK1: [[IF_END]]:
+// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]]
+// CHECK1-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]]
+// CHECK1: [[IF_THEN40]]:
+// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]]
+// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]]
+// CHECK1-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]]
+// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]]
+// CHECK1-NEXT: store i32 [[SUB44]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]])
+// CHECK1-NEXT: br label %[[IF_END45]]
+// CHECK1: [[IF_END45]]:
+// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]]
+// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK1: [[IF_THEN47]]:
+// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
+// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
+// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]]
+// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]]
+// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4
+// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
+// CHECK1-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]]
+// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]]
+// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP70]])
+// CHECK1-NEXT: br label %[[IF_END52]]
+// CHECK1: [[IF_END52]]:
+// CHECK1-NEXT: br label %[[FOR_INC:.*]]
+// CHECK1: [[FOR_INC]]:
+// CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1
+// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK1: [[FOR_END]]:
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @foo3(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__END224:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[J]], align 4
+// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1: [[COND_TRUE]]:
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: br label %[[COND_END:.*]]
+// CHECK1: [[COND_FALSE]]:
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: br label %[[COND_END]]
+// CHECK1: [[COND_END]]:
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8
+// CHECK1-NEXT: store i32 42, ptr [[C]], align 4
+// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK1-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
+// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64
+// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1
+// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1
+// CHECK1-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1
+// CHECK1-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
+// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8
+// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
+// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1
+// CHECK1-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8
+// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4
+// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
+// CHECK1-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256
+// CHECK1-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8
+// CHECK1-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8
+// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8
+// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64
+// CHECK1-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64
+// CHECK1-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]]
+// CHECK1-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8
+// CHECK1-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1
+// CHECK1-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1
+// CHECK1-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1
+// CHECK1-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1
+// CHECK1-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8
+// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
+// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8
+// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
+// CHECK1-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1
+// CHECK1-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
+// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
+// CHECK1-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]]
+// CHECK1-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]]
+// CHECK1: [[COND_TRUE44]]:
+// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
+// CHECK1-NEXT: br label %[[COND_END46:.*]]
+// CHECK1: [[COND_FALSE45]]:
+// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
+// CHECK1-NEXT: br label %[[COND_END46]]
+// CHECK1: [[COND_END46]]:
+// CHECK1-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ]
+// CHECK1-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]]
+// CHECK1-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]]
+// CHECK1: [[COND_TRUE50]]:
+// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT: br label %[[COND_END52:.*]]
+// CHECK1: [[COND_FALSE51]]:
+// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: br label %[[COND_END52]]
+// CHECK1: [[COND_END52]]:
+// CHECK1-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ]
+// CHECK1-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND:.*]]
+// CHECK1: [[FOR_COND]]:
+// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8
+// CHECK1-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]]
+// CHECK1-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1: [[FOR_BODY]]:
+// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
+// CHECK1-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]]
+// CHECK1-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]]
+// CHECK1: [[IF_THEN]]:
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4
+// CHECK1-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4
+// CHECK1-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64
+// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]]
+// CHECK1-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]]
+// CHECK1-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32
+// CHECK1-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4
+// CHECK1-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1
+// CHECK1-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]]
+// CHECK1-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]]
+// CHECK1: [[IF_THEN64]]:
+// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]]
+// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]]
+// CHECK1-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1
+// CHECK1-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]]
+// CHECK1-NEXT: store i32 [[ADD68]], ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP48]])
+// CHECK1-NEXT: br label %[[IF_END]]
+// CHECK1: [[IF_END]]:
+// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]]
+// CHECK1-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]]
+// CHECK1: [[IF_THEN70]]:
+// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]]
+// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]]
+// CHECK1-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2
+// CHECK1-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]]
+// CHECK1-NEXT: store i32 [[ADD74]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP55]])
+// CHECK1-NEXT: br label %[[IF_END75]]
+// CHECK1: [[IF_END75]]:
+// CHECK1-NEXT: br label %[[IF_END76]]
+// CHECK1: [[IF_END76]]:
+// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
+// CHECK1-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]]
+// CHECK1-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]]
+// CHECK1: [[IF_THEN78]]:
+// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8
+// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8
+// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]]
+// CHECK1-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]]
+// CHECK1-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8
+// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
+// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8
+// CHECK1-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1
+// CHECK1-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]]
+// CHECK1-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: store ptr [[TMP63]], ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4
+// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]])
+// CHECK1-NEXT: br label %[[IF_END83]]
+// CHECK1: [[IF_END83]]:
+// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]]
+// CHECK1-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]]
+// CHECK1: [[IF_THEN85]]:
+// CHECK1-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
+// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
+// CHECK1-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]]
+// CHECK1-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]]
+// CHECK1-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8
+// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
+// CHECK1-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1
+// CHECK1-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]]
+// CHECK1-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8
+// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8
+// CHECK1-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8
+// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK1-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]])
+// CHECK1-NEXT: br label %[[IF_END90]]
+// CHECK1: [[IF_END90]]:
+// CHECK1-NEXT: br label %[[FOR_INC:.*]]
+// CHECK1: [[FOR_INC]]:
+// CHECK1-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1
+// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK1: [[FOR_END]]:
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @body(
+// CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @foo1(
+// CHECK2-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]]
+// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2: [[COND_TRUE]]:
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: br label %[[COND_END:.*]]
+// CHECK2: [[COND_FALSE]]:
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: br label %[[COND_END]]
+// CHECK2: [[COND_END]]:
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND:.*]]
+// CHECK2: [[FOR_COND]]:
+// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
+// CHECK2-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2: [[FOR_BODY]]:
+// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]]
+// CHECK2-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK2: [[IF_THEN]]:
+// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]]
+// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]]
+// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]]
+// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]]
+// CHECK2-NEXT: store i32 [[ADD20]], ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP35]])
+// CHECK2-NEXT: br label %[[IF_END]]
+// CHECK2: [[IF_END]]:
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]]
+// CHECK2-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]]
+// CHECK2: [[IF_THEN22]]:
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]]
+// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]]
+// CHECK2-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]]
+// CHECK2-NEXT: store i32 [[ADD26]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]])
+// CHECK2-NEXT: br label %[[IF_END27]]
+// CHECK2: [[IF_END27]]:
+// CHECK2-NEXT: br label %[[FOR_INC:.*]]
+// CHECK2: [[FOR_INC]]:
+// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1
+// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK2: [[FOR_END]]:
+// CHECK2-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @foo3(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__END224:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[J]], align 4
+// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2: [[COND_TRUE]]:
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: br label %[[COND_END:.*]]
+// CHECK2: [[COND_FALSE]]:
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: br label %[[COND_END]]
+// CHECK2: [[COND_END]]:
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8
+// CHECK2-NEXT: store i32 42, ptr [[C]], align 4
+// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8
+// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK2-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
+// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64
+// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1
+// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1
+// CHECK2-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1
+// CHECK2-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8
+// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
+// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8
+// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8
+// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
+// CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1
+// CHECK2-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8
+// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4
+// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8
+// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
+// CHECK2-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256
+// CHECK2-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8
+// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8
+// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8
+// CHECK2-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8
+// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8
+// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64
+// CHECK2-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64
+// CHECK2-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]]
+// CHECK2-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8
+// CHECK2-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1
+// CHECK2-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1
+// CHECK2-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1
+// CHECK2-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1
+// CHECK2-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8
+// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
+// CHECK2-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8
+// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8
+// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
+// CHECK2-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1
+// CHECK2-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
+// CHECK2-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8
+// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
+// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
+// CHECK2-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]]
+// CHECK2-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]]
+// CHECK2: [[COND_TRUE44]]:
+// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
+// CHECK2-NEXT: br label %[[COND_END46:.*]]
+// CHECK2: [[COND_FALSE45]]:
+// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
+// CHECK2-NEXT: br label %[[COND_END46]]
+// CHECK2: [[COND_END46]]:
+// CHECK2-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ]
+// CHECK2-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]]
+// CHECK2-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]]
+// CHECK2: [[COND_TRUE50]]:
+// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT: br label %[[COND_END52:.*]]
+// CHECK2: [[COND_FALSE51]]:
+// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: br label %[[COND_END52]]
+// CHECK2: [[COND_END52]]:
+// CHECK2-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ]
+// CHECK2-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND:.*]]
+// CHECK2: [[FOR_COND]]:
+// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8
+// CHECK2-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]]
+// CHECK2-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2: [[FOR_BODY]]:
+// CHECK2-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
+// CHECK2-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]]
+// CHECK2-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]]
+// CHECK2: [[IF_THEN]]:
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4
+// CHECK2-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4
+// CHECK2-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64
+// CHECK2-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]]
+// CHECK2-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]]
+// CHECK2-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32
+// CHECK2-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4
+// CHECK2-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1
+// CHECK2-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]]
+// CHECK2-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]]
+// CHECK2: [[IF_THEN64]]:
+// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]]
+// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]]
+// CHECK2-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1
+// CHECK2-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]]
+// CHECK2-NEXT: store i32 [[ADD68]], ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP48]])
+// CHECK2-NEXT: br label %[[IF_END]]
+// CHECK2: [[IF_END]]:
+// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]]
+// CHECK2-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]]
+// CHECK2: [[IF_THEN70]]:
+// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]]
+// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]]
+// CHECK2-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2
+// CHECK2-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]]
+// CHECK2-NEXT: store i32 [[ADD74]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP55]])
+// CHECK2-NEXT: br label %[[IF_END75]]
+// CHECK2: [[IF_END75]]:
+// CHECK2-NEXT: br label %[[IF_END76]]
+// CHECK2: [[IF_END76]]:
+// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
+// CHECK2-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]]
+// CHECK2-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]]
+// CHECK2: [[IF_THEN78]]:
+// CHECK2-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8
+// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8
+// CHECK2-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]]
+// CHECK2-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]]
+// CHECK2-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8
+// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
+// CHECK2-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8
+// CHECK2-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1
+// CHECK2-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]]
+// CHECK2-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: store ptr [[TMP63]], ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4
+// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]])
+// CHECK2-NEXT: br label %[[IF_END83]]
+// CHECK2: [[IF_END83]]:
+// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]]
+// CHECK2-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]]
+// CHECK2: [[IF_THEN85]]:
+// CHECK2-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
+// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
+// CHECK2-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]]
+// CHECK2-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]]
+// CHECK2-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8
+// CHECK2-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
+// CHECK2-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1
+// CHECK2-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]]
+// CHECK2-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8
+// CHECK2-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8
+// CHECK2-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8
+// CHECK2-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK2-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK2-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]])
+// CHECK2-NEXT: br label %[[IF_END90]]
+// CHECK2: [[IF_END90]]:
+// CHECK2-NEXT: br label %[[FOR_INC:.*]]
+// CHECK2: [[FOR_INC]]:
+// CHECK2-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1
+// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK2: [[FOR_END]]:
+// CHECK2-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @tfoo2(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4)
+// CHECK2-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_(
+// CHECK2-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
+// CHECK2-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]]
+// CHECK2-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1
+// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]]
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]]
+// CHECK2-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1
+// CHECK2-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK2-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1
+// CHECK2-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]]
+// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2: [[COND_TRUE]]:
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: br label %[[COND_END:.*]]
+// CHECK2: [[COND_FALSE]]:
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: br label %[[COND_END]]
+// CHECK2: [[COND_END]]:
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]]
+// CHECK2-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]]
+// CHECK2: [[COND_TRUE30]]:
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT: br label %[[COND_END32:.*]]
+// CHECK2: [[COND_FALSE31]]:
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT: br label %[[COND_END32]]
+// CHECK2: [[COND_END32]]:
+// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ]
+// CHECK2-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND:.*]]
+// CHECK2: [[FOR_COND]]:
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2: [[FOR_BODY]]:
+// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]]
+// CHECK2-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK2: [[IF_THEN]]:
+// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]]
+// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]]
+// CHECK2-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]]
+// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]]
+// CHECK2-NEXT: store i32 [[ADD38]], ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]])
+// CHECK2-NEXT: br label %[[IF_END]]
+// CHECK2: [[IF_END]]:
+// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]]
+// CHECK2-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]]
+// CHECK2: [[IF_THEN40]]:
+// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]]
+// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]]
+// CHECK2-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]]
+// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]]
+// CHECK2-NEXT: store i32 [[SUB44]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]])
+// CHECK2-NEXT: br label %[[IF_END45]]
+// CHECK2: [[IF_END45]]:
+// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]]
+// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK2: [[IF_THEN47]]:
+// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
+// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
+// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]]
+// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]]
+// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4
+// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
+// CHECK2-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]]
+// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]]
+// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK2-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP70]])
+// CHECK2-NEXT: br label %[[IF_END52]]
+// CHECK2: [[IF_END52]]:
+// CHECK2-NEXT: br label %[[FOR_INC:.*]]
+// CHECK2: [[FOR_INC]]:
+// CHECK2-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1
+// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK2: [[FOR_END]]:
+// CHECK2-NEXT: ret void
+//
+//.
+// CHECK1: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
+// CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"}
+// CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]}
+// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
+//.
+// CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
+// CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"}
+// CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]}
+// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
+//.
diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp
new file mode 100644
index 0000000000000..50dedfd2c0dc6
--- /dev/null
+++ b/clang/test/OpenMP/fuse_messages.cpp
@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -fsyntax-only -Wuninitialized -verify %s
+
+void func() {
+
+ // expected-error at +2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}}
+ #pragma omp fuse
+ ;
+
+ // expected-error at +2 {{statement after '#pragma omp fuse' must be a for loop}}
+ #pragma omp fuse
+ {int bar = 0;}
+
+ // expected-error at +4 {{statement after '#pragma omp fuse' must be a for loop}}
+ #pragma omp fuse
+ {
+ for(int i = 0; i < 10; ++i);
+ int x = 2;
+ }
+
+ // expected-error at +2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}}
+ #pragma omp fuse
+ #pragma omp for
+ for (int i = 0; i < 7; ++i)
+ ;
+
+ {
+ // expected-error at +2 {{expected statement}}
+ #pragma omp fuse
+ }
+
+ // expected-warning at +1 {{extra tokens at the end of '#pragma omp fuse' are ignored}}
+ #pragma omp fuse foo
+ {
+ for (int i = 0; i < 7; ++i)
+ ;
+ }
+
+
+ // expected-error at +1 {{unexpected OpenMP clause 'final' in directive '#pragma omp fuse'}}
+ #pragma omp fuse final(0)
+ {
+ for (int i = 0; i < 7; ++i)
+ ;
+ }
+
+ //expected-error at +4 {{loop after '#pragma omp fuse' is not in canonical form}}
+ //expected-error at +3 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'i'}}
+ #pragma omp fuse
+ {
+ for(int i = 0; i < 10; i*=2) {
+ ;
+ }
+ }
+
+ //expected-error at +2 {{loop sequence after '#pragma omp fuse' must contain at least 1 canonical loop or loop-generating construct}}
+ #pragma omp fuse
+ {}
+
+ //expected-error at +3 {{statement after '#pragma omp fuse' must be a for loop}}
+ #pragma omp fuse
+ {
+ #pragma omp unroll full
+ for(int i = 0; i < 10; ++i);
+
+ for(int j = 0; j < 10; ++j);
+ }
+
+ //expected-warning at +5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'unsigned int'}}
+ //expected-warning at +5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'long long'}}
+ #pragma omp fuse
+ {
+ for(int i = 0; i < 10; ++i);
+ for(unsigned int j = 0; j < 10; ++j);
+ for(long long k = 0; k < 100; ++k);
+ }
+}
\ No newline at end of file
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 06a17006fdee9..fd788ac3d69d4 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2206,6 +2206,7 @@ class EnqueueVisitor : public ConstStmtVisitor<EnqueueVisitor, void>,
void VisitOMPUnrollDirective(const OMPUnrollDirective *D);
void VisitOMPReverseDirective(const OMPReverseDirective *D);
void VisitOMPInterchangeDirective(const OMPInterchangeDirective *D);
+ void VisitOMPFuseDirective(const OMPFuseDirective *D);
void VisitOMPForDirective(const OMPForDirective *D);
void VisitOMPForSimdDirective(const OMPForSimdDirective *D);
void VisitOMPSectionsDirective(const OMPSectionsDirective *D);
@@ -3364,6 +3365,10 @@ void EnqueueVisitor::VisitOMPInterchangeDirective(
VisitOMPLoopTransformationDirective(D);
}
+void EnqueueVisitor::VisitOMPFuseDirective(const OMPFuseDirective *D) {
+ VisitOMPLoopTransformationDirective(D);
+}
+
void EnqueueVisitor::VisitOMPForDirective(const OMPForDirective *D) {
VisitOMPLoopDirective(D);
}
@@ -6317,6 +6322,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) {
return cxstring::createRef("OMPReverseDirective");
case CXCursor_OMPInterchangeDirective:
return cxstring::createRef("OMPInterchangeDirective");
+ case CXCursor_OMPFuseDirective:
+ return cxstring::createRef("OMPFuseDirective");
case CXCursor_OMPForDirective:
return cxstring::createRef("OMPForDirective");
case CXCursor_OMPForSimdDirective:
diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index 635d03a88d105..709fa60d28d8d 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -688,6 +688,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
case Stmt::OMPInterchangeDirectiveClass:
K = CXCursor_OMPInterchangeDirective;
break;
+ case Stmt::OMPFuseDirectiveClass:
+ K = CXCursor_OMPFuseDirective;
+ break;
case Stmt::OMPForDirectiveClass:
K = CXCursor_OMPForDirective;
break;
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 0af4b436649a3..8286cfcadaafd 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -852,6 +852,10 @@ def OMP_For : Directive<"for"> {
let category = CA_Executable;
let languages = [L_C];
}
+def OMP_Fuse : Directive<"fuse"> {
+ let association = AS_Loop;
+ let category = CA_Executable;
+}
def OMP_Interchange : Directive<"interchange"> {
let allowedOnceClauses = [
VersionedClause<OMPC_Permutation>,
diff --git a/openmp/runtime/test/transform/fuse/foreach.cpp b/openmp/runtime/test/transform/fuse/foreach.cpp
new file mode 100644
index 0000000000000..cabf4bf8a511d
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/foreach.cpp
@@ -0,0 +1,192 @@
+// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdarg>
+#include <cstdio>
+#include <vector>
+
+struct Reporter {
+ const char *name;
+
+ Reporter(const char *name) : name(name) { print("ctor"); }
+
+ Reporter() : name("<anon>") { print("ctor"); }
+
+ Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); }
+
+ Reporter(Reporter &&that) : name(that.name) { print("move ctor"); }
+
+ ~Reporter() { print("dtor"); }
+
+ const Reporter &operator=(const Reporter &that) {
+ print("copy assign");
+ this->name = that.name;
+ return *this;
+ }
+
+ const Reporter &operator=(Reporter &&that) {
+ print("move assign");
+ this->name = that.name;
+ return *this;
+ }
+
+ struct Iterator {
+ const Reporter *owner;
+ int pos;
+
+ Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {}
+
+ Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) {
+ owner->print("iterator copy ctor");
+ }
+
+ Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) {
+ owner->print("iterator move ctor");
+ }
+
+ ~Iterator() { owner->print("iterator dtor"); }
+
+ const Iterator &operator=(const Iterator &that) {
+ owner->print("iterator copy assign");
+ this->owner = that.owner;
+ this->pos = that.pos;
+ return *this;
+ }
+
+ const Iterator &operator=(Iterator &&that) {
+ owner->print("iterator move assign");
+ this->owner = that.owner;
+ this->pos = that.pos;
+ return *this;
+ }
+
+ bool operator==(const Iterator &that) const {
+ owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos);
+ return this->pos == that.pos;
+ }
+
+ Iterator &operator++() {
+ owner->print("iterator prefix ++");
+ pos -= 1;
+ return *this;
+ }
+
+ Iterator operator++(int) {
+ owner->print("iterator postfix ++");
+ auto result = *this;
+ pos -= 1;
+ return result;
+ }
+
+ int operator*() const {
+ int result = 2 - pos;
+ owner->print("iterator deref: %i", result);
+ return result;
+ }
+
+ size_t operator-(const Iterator &that) const {
+ int result = (2 - this->pos) - (2 - that.pos);
+ owner->print("iterator distance: %d", result);
+ return result;
+ }
+
+ Iterator operator+(int steps) const {
+ owner->print("iterator advance: %i += %i", 2 - this->pos, steps);
+ return Iterator(owner, pos - steps);
+ }
+
+ void print(const char *msg) const { owner->print(msg); }
+ };
+
+ Iterator begin() const {
+ print("begin()");
+ return Iterator(this, 2);
+ }
+
+ Iterator end() const {
+ print("end()");
+ return Iterator(this, -1);
+ }
+
+ void print(const char *msg, ...) const {
+ va_list args;
+ va_start(args, msg);
+ printf("[%s] ", name);
+ vprintf(msg, args);
+ printf("\n");
+ va_end(args);
+ }
+};
+
+int main() {
+ printf("do\n");
+#pragma omp fuse
+ {
+ for (Reporter a{"C"}; auto &&v : Reporter("A"))
+ printf("v=%d\n", v);
+ for (Reporter aa{"D"}; auto &&vv : Reporter("B"))
+ printf("vv=%d\n", vv);
+ }
+ printf("done\n");
+ return EXIT_SUCCESS;
+}
+
+// CHECK: [C] ctor
+// CHECK-NEXT: [A] ctor
+// CHECK-NEXT: [A] end()
+// CHECK-NEXT: [A] begin()
+// CHECK-NEXT: [A] begin()
+// CHECK-NEXT: [A] iterator distance: 3
+// CHECK-NEXT: [D] ctor
+// CHECK-NEXT: [B] ctor
+// CHECK-NEXT: [B] end()
+// CHECK-NEXT: [B] begin()
+// CHECK-NEXT: [B] begin()
+// CHECK-NEXT: [B] iterator distance: 3
+// CHECK-NEXT: [A] iterator advance: 0 += 0
+// CHECK-NEXT: [A] iterator move assign
+// CHECK-NEXT: [A] iterator deref: 0
+// CHECK-NEXT: v=0
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [B] iterator advance: 0 += 0
+// CHECK-NEXT: [B] iterator move assign
+// CHECK-NEXT: [B] iterator deref: 0
+// CHECK-NEXT: vv=0
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [A] iterator advance: 0 += 1
+// CHECK-NEXT: [A] iterator move assign
+// CHECK-NEXT: [A] iterator deref: 1
+// CHECK-NEXT: v=1
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [B] iterator advance: 0 += 1
+// CHECK-NEXT: [B] iterator move assign
+// CHECK-NEXT: [B] iterator deref: 1
+// CHECK-NEXT: vv=1
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [A] iterator advance: 0 += 2
+// CHECK-NEXT: [A] iterator move assign
+// CHECK-NEXT: [A] iterator deref: 2
+// CHECK-NEXT: v=2
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [B] iterator advance: 0 += 2
+// CHECK-NEXT: [B] iterator move assign
+// CHECK-NEXT: [B] iterator deref: 2
+// CHECK-NEXT: vv=2
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [B] dtor
+// CHECK-NEXT: [D] dtor
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [A] dtor
+// CHECK-NEXT: [C] dtor
+// CHECK-NEXT: done
+
+
+#endif
diff --git a/openmp/runtime/test/transform/fuse/intfor.c b/openmp/runtime/test/transform/fuse/intfor.c
new file mode 100644
index 0000000000000..b8171b4df7042
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/intfor.c
@@ -0,0 +1,50 @@
+// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main() {
+ printf("do\n");
+#pragma omp fuse
+ {
+ for (int i = 5; i <= 25; i += 5)
+ printf("i=%d\n", i);
+ for (int j = 10; j < 100; j += 10)
+ printf("j=%d\n", j);
+ for (int k = 10; k > 0; --k)
+ printf("k=%d\n", k);
+ }
+ printf("done\n");
+ return EXIT_SUCCESS;
+}
+#endif /* HEADER */
+
+// CHECK: do
+// CHECK-NEXT: i=5
+// CHECK-NEXT: j=10
+// CHECK-NEXT: k=10
+// CHECK-NEXT: i=10
+// CHECK-NEXT: j=20
+// CHECK-NEXT: k=9
+// CHECK-NEXT: i=15
+// CHECK-NEXT: j=30
+// CHECK-NEXT: k=8
+// CHECK-NEXT: i=20
+// CHECK-NEXT: j=40
+// CHECK-NEXT: k=7
+// CHECK-NEXT: i=25
+// CHECK-NEXT: j=50
+// CHECK-NEXT: k=6
+// CHECK-NEXT: j=60
+// CHECK-NEXT: k=5
+// CHECK-NEXT: j=70
+// CHECK-NEXT: k=4
+// CHECK-NEXT: j=80
+// CHECK-NEXT: k=3
+// CHECK-NEXT: j=90
+// CHECK-NEXT: k=2
+// CHECK-NEXT: k=1
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/fuse/iterfor.cpp b/openmp/runtime/test/transform/fuse/iterfor.cpp
new file mode 100644
index 0000000000000..552484b2981c4
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/iterfor.cpp
@@ -0,0 +1,194 @@
+// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdarg>
+#include <cstdio>
+#include <vector>
+
+struct Reporter {
+ const char *name;
+
+ Reporter(const char *name) : name(name) { print("ctor"); }
+
+ Reporter() : name("<anon>") { print("ctor"); }
+
+ Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); }
+
+ Reporter(Reporter &&that) : name(that.name) { print("move ctor"); }
+
+ ~Reporter() { print("dtor"); }
+
+ const Reporter &operator=(const Reporter &that) {
+ print("copy assign");
+ this->name = that.name;
+ return *this;
+ }
+
+ const Reporter &operator=(Reporter &&that) {
+ print("move assign");
+ this->name = that.name;
+ return *this;
+ }
+
+ struct Iterator {
+ const Reporter *owner;
+ int pos;
+
+ Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {}
+
+ Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) {
+ owner->print("iterator copy ctor");
+ }
+
+ Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) {
+ owner->print("iterator move ctor");
+ }
+
+ ~Iterator() { owner->print("iterator dtor"); }
+
+ const Iterator &operator=(const Iterator &that) {
+ owner->print("iterator copy assign");
+ this->owner = that.owner;
+ this->pos = that.pos;
+ return *this;
+ }
+
+ const Iterator &operator=(Iterator &&that) {
+ owner->print("iterator move assign");
+ this->owner = that.owner;
+ this->pos = that.pos;
+ return *this;
+ }
+
+ bool operator==(const Iterator &that) const {
+ owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos);
+ return this->pos == that.pos;
+ }
+
+ bool operator!=(const Iterator &that) const {
+ owner->print("iterator %d != %d", 2 - this->pos, 2 - that.pos);
+ return this->pos != that.pos;
+ }
+
+ Iterator &operator++() {
+ owner->print("iterator prefix ++");
+ pos -= 1;
+ return *this;
+ }
+
+ Iterator operator++(int) {
+ owner->print("iterator postfix ++");
+ auto result = *this;
+ pos -= 1;
+ return result;
+ }
+
+ int operator*() const {
+ int result = 2 - pos;
+ owner->print("iterator deref: %i", result);
+ return result;
+ }
+
+ size_t operator-(const Iterator &that) const {
+ int result = (2 - this->pos) - (2 - that.pos);
+ owner->print("iterator distance: %d", result);
+ return result;
+ }
+
+ Iterator operator+(int steps) const {
+ owner->print("iterator advance: %i += %i", 2 - this->pos, steps);
+ return Iterator(owner, pos - steps);
+ }
+ };
+
+ Iterator begin() const {
+ print("begin()");
+ return Iterator(this, 2);
+ }
+
+ Iterator end() const {
+ print("end()");
+ return Iterator(this, -1);
+ }
+
+ void print(const char *msg, ...) const {
+ va_list args;
+ va_start(args, msg);
+ printf("[%s] ", name);
+ vprintf(msg, args);
+ printf("\n");
+ va_end(args);
+ }
+};
+
+int main() {
+ printf("do\n");
+ Reporter C("C");
+ Reporter D("D");
+#pragma omp fuse
+ {
+ for (auto it = C.begin(); it != C.end(); ++it)
+ printf("v=%d\n", *it);
+
+ for (auto it = D.begin(); it != D.end(); ++it)
+ printf("vv=%d\n", *it);
+ }
+ printf("done\n");
+ return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK: do
+// CHECK: [C] ctor
+// CHECK-NEXT: [D] ctor
+// CHECK-NEXT: [C] begin()
+// CHECK-NEXT: [C] begin()
+// CHECK-NEXT: [C] end()
+// CHECK-NEXT: [C] iterator distance: 3
+// CHECK-NEXT: [D] begin()
+// CHECK-NEXT: [D] begin()
+// CHECK-NEXT: [D] end()
+// CHECK-NEXT: [D] iterator distance: 3
+// CHECK-NEXT: [C] iterator advance: 0 += 0
+// CHECK-NEXT: [C] iterator move assign
+// CHECK-NEXT: [C] iterator deref: 0
+// CHECK-NEXT: v=0
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [D] iterator advance: 0 += 0
+// CHECK-NEXT: [D] iterator move assign
+// CHECK-NEXT: [D] iterator deref: 0
+// CHECK-NEXT: vv=0
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [C] iterator advance: 0 += 1
+// CHECK-NEXT: [C] iterator move assign
+// CHECK-NEXT: [C] iterator deref: 1
+// CHECK-NEXT: v=1
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [D] iterator advance: 0 += 1
+// CHECK-NEXT: [D] iterator move assign
+// CHECK-NEXT: [D] iterator deref: 1
+// CHECK-NEXT: vv=1
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [C] iterator advance: 0 += 2
+// CHECK-NEXT: [C] iterator move assign
+// CHECK-NEXT: [C] iterator deref: 2
+// CHECK-NEXT: v=2
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [D] iterator advance: 0 += 2
+// CHECK-NEXT: [D] iterator move assign
+// CHECK-NEXT: [D] iterator deref: 2
+// CHECK-NEXT: vv=2
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: done
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [D] dtor
+// CHECK-NEXT: [C] dtor
diff --git a/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp
new file mode 100644
index 0000000000000..e9f76713fe3e0
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp
@@ -0,0 +1,208 @@
+// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdarg>
+#include <cstdio>
+#include <vector>
+
+struct Reporter {
+ const char *name;
+
+ Reporter(const char *name) : name(name) { print("ctor"); }
+
+ Reporter() : name("<anon>") { print("ctor"); }
+
+ Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); }
+
+ Reporter(Reporter &&that) : name(that.name) { print("move ctor"); }
+
+ ~Reporter() { print("dtor"); }
+
+ const Reporter &operator=(const Reporter &that) {
+ print("copy assign");
+ this->name = that.name;
+ return *this;
+ }
+
+ const Reporter &operator=(Reporter &&that) {
+ print("move assign");
+ this->name = that.name;
+ return *this;
+ }
+
+ struct Iterator {
+ const Reporter *owner;
+ int pos;
+
+ Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {}
+
+ Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) {
+ owner->print("iterator copy ctor");
+ }
+
+ Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) {
+ owner->print("iterator move ctor");
+ }
+
+ ~Iterator() { owner->print("iterator dtor"); }
+
+ const Iterator &operator=(const Iterator &that) {
+ owner->print("iterator copy assign");
+ this->owner = that.owner;
+ this->pos = that.pos;
+ return *this;
+ }
+
+ const Iterator &operator=(Iterator &&that) {
+ owner->print("iterator move assign");
+ this->owner = that.owner;
+ this->pos = that.pos;
+ return *this;
+ }
+
+ bool operator==(const Iterator &that) const {
+ owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos);
+ return this->pos == that.pos;
+ }
+
+ Iterator &operator++() {
+ owner->print("iterator prefix ++");
+ pos -= 1;
+ return *this;
+ }
+
+ Iterator operator++(int) {
+ owner->print("iterator postfix ++");
+ auto result = *this;
+ pos -= 1;
+ return result;
+ }
+
+ int operator*() const {
+ int result = 2 - pos;
+ owner->print("iterator deref: %i", result);
+ return result;
+ }
+
+ size_t operator-(const Iterator &that) const {
+ int result = (2 - this->pos) - (2 - that.pos);
+ owner->print("iterator distance: %d", result);
+ return result;
+ }
+
+ Iterator operator+(int steps) const {
+ owner->print("iterator advance: %i += %i", 2 - this->pos, steps);
+ return Iterator(owner, pos - steps);
+ }
+
+ void print(const char *msg) const { owner->print(msg); }
+ };
+
+ Iterator begin() const {
+ print("begin()");
+ return Iterator(this, 2);
+ }
+
+ Iterator end() const {
+ print("end()");
+ return Iterator(this, -1);
+ }
+
+ void print(const char *msg, ...) const {
+ va_list args;
+ va_start(args, msg);
+ printf("[%s] ", name);
+ vprintf(msg, args);
+ printf("\n");
+ va_end(args);
+ }
+};
+
+int main() {
+ printf("do\n");
+#pragma omp parallel for collapse(2) num_threads(1)
+ for (int i = 0; i < 3; ++i)
+#pragma omp fuse
+ {
+ for (Reporter c{"init-stmt"}; auto &&v : Reporter("range"))
+ printf("i=%d v=%d\n", i, v);
+ for (int vv = 0; vv < 3; ++vv)
+ printf("i=%d vv=%d\n", i, vv);
+ }
+ printf("done\n");
+ return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK: do
+// CHECK-NEXT: [init-stmt] ctor
+// CHECK-NEXT: [range] ctor
+// CHECK-NEXT: [range] end()
+// CHECK-NEXT: [range] begin()
+// CHECK-NEXT: [range] begin()
+// CHECK-NEXT: [range] iterator distance: 3
+// CHECK-NEXT: [range] iterator advance: 0 += 0
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 0
+// CHECK-NEXT: i=0 v=0
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=0 vv=0
+// CHECK-NEXT: [range] iterator advance: 0 += 1
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 1
+// CHECK-NEXT: i=0 v=1
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=0 vv=1
+// CHECK-NEXT: [range] iterator advance: 0 += 2
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 2
+// CHECK-NEXT: i=0 v=2
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=0 vv=2
+// CHECK-NEXT: [range] iterator advance: 0 += 0
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 0
+// CHECK-NEXT: i=1 v=0
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=1 vv=0
+// CHECK-NEXT: [range] iterator advance: 0 += 1
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 1
+// CHECK-NEXT: i=1 v=1
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=1 vv=1
+// CHECK-NEXT: [range] iterator advance: 0 += 2
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 2
+// CHECK-NEXT: i=1 v=2
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=1 vv=2
+// CHECK-NEXT: [range] iterator advance: 0 += 0
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 0
+// CHECK-NEXT: i=2 v=0
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=2 vv=0
+// CHECK-NEXT: [range] iterator advance: 0 += 1
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 1
+// CHECK-NEXT: i=2 v=1
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=2 vv=1
+// CHECK-NEXT: [range] iterator advance: 0 += 2
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 2
+// CHECK-NEXT: i=2 v=2
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=2 vv=2
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: [range] dtor
+// CHECK-NEXT: [init-stmt] dtor
+// CHECK-NEXT: done
+
diff --git a/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c
new file mode 100644
index 0000000000000..272908e72c429
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c
@@ -0,0 +1,45 @@
+// RUN: %libomp-cxx-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdio>
+
+int main() {
+ printf("do\n");
+#pragma omp parallel for collapse(2) num_threads(1)
+ for (int i = 0; i < 3; ++i)
+#pragma omp fuse
+ {
+ for (int j = 0; j < 3; ++j)
+ printf("i=%d j=%d\n", i, j);
+ for (int k = 0; k < 3; ++k)
+ printf("i=%d k=%d\n", i, k);
+ }
+ printf("done\n");
+ return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK: do
+// CHECK: i=0 j=0
+// CHECK-NEXT: i=0 k=0
+// CHECK-NEXT: i=0 j=1
+// CHECK-NEXT: i=0 k=1
+// CHECK-NEXT: i=0 j=2
+// CHECK-NEXT: i=0 k=2
+// CHECK-NEXT: i=1 j=0
+// CHECK-NEXT: i=1 k=0
+// CHECK-NEXT: i=1 j=1
+// CHECK-NEXT: i=1 k=1
+// CHECK-NEXT: i=1 j=2
+// CHECK-NEXT: i=1 k=2
+// CHECK-NEXT: i=2 j=0
+// CHECK-NEXT: i=2 k=0
+// CHECK-NEXT: i=2 j=1
+// CHECK-NEXT: i=2 k=1
+// CHECK-NEXT: i=2 j=2
+// CHECK-NEXT: i=2 k=2
+// CHECK-NEXT: done
>From 7e3bd1e3afcdc246da0362ffb8693b160f9d3f4a Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:28:04 +0000
Subject: [PATCH 2/9] Add looprange clause
---
clang/include/clang/AST/OpenMPClause.h | 100 ++++++
clang/include/clang/AST/RecursiveASTVisitor.h | 8 +
clang/include/clang/AST/StmtOpenMP.h | 18 +-
.../clang/Basic/DiagnosticSemaKinds.td | 5 +
clang/include/clang/Parse/Parser.h | 3 +
clang/include/clang/Sema/SemaOpenMP.h | 6 +
clang/lib/AST/OpenMPClause.cpp | 35 ++
clang/lib/AST/StmtOpenMP.cpp | 7 +-
clang/lib/AST/StmtProfile.cpp | 7 +
clang/lib/Basic/OpenMPKinds.cpp | 2 +
clang/lib/Parse/ParseOpenMP.cpp | 36 ++
clang/lib/Sema/SemaOpenMP.cpp | 155 +++++++--
clang/lib/Sema/TreeTransform.h | 33 ++
clang/lib/Serialization/ASTReader.cpp | 11 +
clang/lib/Serialization/ASTReaderStmt.cpp | 4 +-
clang/lib/Serialization/ASTWriter.cpp | 8 +
clang/test/OpenMP/fuse_ast_print.cpp | 67 ++++
clang/test/OpenMP/fuse_codegen.cpp | 320 +++++++++++++++++-
clang/test/OpenMP/fuse_messages.cpp | 112 +++++-
clang/tools/libclang/CIndex.cpp | 5 +
llvm/include/llvm/Frontend/OpenMP/ClauseT.h | 16 +-
llvm/include/llvm/Frontend/OpenMP/OMP.td | 6 +
22 files changed, 921 insertions(+), 43 deletions(-)
diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index 6fd16bc0f03be..8f937cdef9cd0 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -1143,6 +1143,106 @@ class OMPFullClause final : public OMPNoChildClause<llvm::omp::OMPC_full> {
static OMPFullClause *CreateEmpty(const ASTContext &C);
};
+/// This class represents the 'looprange' clause in the
+/// '#pragma omp fuse' directive
+///
+/// \code {c}
+/// #pragma omp fuse looprange(1,2)
+/// {
+/// for(int i = 0; i < 64; ++i)
+/// for(int j = 0; j < 256; j+=2)
+/// for(int k = 127; k >= 0; --k)
+/// \endcode
+class OMPLoopRangeClause final : public OMPClause {
+ friend class OMPClauseReader;
+
+ explicit OMPLoopRangeClause()
+ : OMPClause(llvm::omp::OMPC_looprange, {}, {}) {}
+
+ /// Location of '('
+ SourceLocation LParenLoc;
+
+ /// Location of 'first'
+ SourceLocation FirstLoc;
+
+ /// Location of 'count'
+ SourceLocation CountLoc;
+
+ /// Expr associated with 'first' argument
+ Expr *First = nullptr;
+
+ /// Expr associated with 'count' argument
+ Expr *Count = nullptr;
+
+ /// Set 'first'
+ void setFirst(Expr *First) { this->First = First; }
+
+ /// Set 'count'
+ void setCount(Expr *Count) { this->Count = Count; }
+
+ /// Set location of '('.
+ void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
+ /// Set location of 'first' argument
+ void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; }
+
+ /// Set location of 'count' argument
+ void setCountLoc(SourceLocation Loc) { CountLoc = Loc; }
+
+public:
+ /// Build an AST node for a 'looprange' clause
+ ///
+ /// \param StartLoc Starting location of the clause.
+ /// \param LParenLoc Location of '('.
+ /// \param ModifierLoc Modifier location.
+ /// \param
+ static OMPLoopRangeClause *
+ Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
+ SourceLocation FirstLoc, SourceLocation CountLoc,
+ SourceLocation EndLoc, Expr *First, Expr *Count);
+
+ /// Build an empty 'looprange' node for deserialization
+ ///
+ /// \param C Context of the AST.
+ static OMPLoopRangeClause *CreateEmpty(const ASTContext &C);
+
+ /// Returns the location of '('
+ SourceLocation getLParenLoc() const { return LParenLoc; }
+
+ /// Returns the location of 'first'
+ SourceLocation getFirstLoc() const { return FirstLoc; }
+
+ /// Returns the location of 'count'
+ SourceLocation getCountLoc() const { return CountLoc; }
+
+ /// Returns the argument 'first' or nullptr if not set
+ Expr *getFirst() const { return cast_or_null<Expr>(First); }
+
+ /// Returns the argument 'count' or nullptr if not set
+ Expr *getCount() const { return cast_or_null<Expr>(Count); }
+
+ child_range children() {
+ return child_range(reinterpret_cast<Stmt **>(&First),
+ reinterpret_cast<Stmt **>(&Count) + 1);
+ }
+
+ const_child_range children() const {
+ auto Children = const_cast<OMPLoopRangeClause *>(this)->children();
+ return const_child_range(Children.begin(), Children.end());
+ }
+
+ child_range used_children() {
+ return child_range(child_iterator(), child_iterator());
+ }
+ const_child_range used_children() const {
+ return const_child_range(const_child_iterator(), const_child_iterator());
+ }
+
+ static bool classof(const OMPClause *T) {
+ return T->getClauseKind() == llvm::omp::OMPC_looprange;
+ }
+};
+
/// Representation of the 'partial' clause of the '#pragma omp unroll'
/// directive.
///
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 057e9e346ce4e..94066edc64933 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -3400,6 +3400,14 @@ bool RecursiveASTVisitor<Derived>::VisitOMPFullClause(OMPFullClause *C) {
return true;
}
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPLoopRangeClause(
+ OMPLoopRangeClause *C) {
+ TRY_TO(TraverseStmt(C->getFirst()));
+ TRY_TO(TraverseStmt(C->getCount()));
+ return true;
+}
+
template <typename Derived>
bool RecursiveASTVisitor<Derived>::VisitOMPPartialClause(OMPPartialClause *C) {
TRY_TO(TraverseStmt(C->getFactor()));
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index dc6f797e24ab8..85bde292ca748 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -5572,7 +5572,9 @@ class OMPTileDirective final : public OMPLoopTransformationDirective {
: OMPLoopTransformationDirective(OMPTileDirectiveClass,
llvm::omp::OMPD_tile, StartLoc, EndLoc,
NumLoops) {
+ // Tiling doubles the original number of loops
setNumGeneratedLoops(2 * NumLoops);
+ // Produces a single top-level canonical loop nest
setNumGeneratedLoopNests(1);
}
@@ -5803,9 +5805,9 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective {
: OMPLoopTransformationDirective(OMPReverseDirectiveClass,
llvm::omp::OMPD_reverse, StartLoc,
EndLoc, 1) {
-
- setNumGeneratedLoopNests(1);
+ // Reverse produces a single top-level canonical loop nest
setNumGeneratedLoops(1);
+ setNumGeneratedLoopNests(1);
}
void setPreInits(Stmt *PreInits) {
@@ -5873,6 +5875,8 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective {
: OMPLoopTransformationDirective(OMPInterchangeDirectiveClass,
llvm::omp::OMPD_interchange, StartLoc,
EndLoc, NumLoops) {
+ // Interchange produces a single top-level canonical loop
+ // nest, with the exact same amount of total loops
setNumGeneratedLoops(NumLoops);
setNumGeneratedLoopNests(1);
}
@@ -5950,11 +5954,7 @@ class OMPFuseDirective final : public OMPLoopTransformationDirective {
unsigned NumLoops)
: OMPLoopTransformationDirective(OMPFuseDirectiveClass,
llvm::omp::OMPD_fuse, StartLoc, EndLoc,
- NumLoops) {
- setNumGeneratedLoops(1);
- // TODO: After implementing the looprange clause, change this logic
- setNumGeneratedLoopNests(1);
- }
+ NumLoops) {}
void setPreInits(Stmt *PreInits) {
Data->getChildren()[PreInitsOffset] = PreInits;
@@ -5990,8 +5990,10 @@ class OMPFuseDirective final : public OMPLoopTransformationDirective {
/// \param C Context of the AST
/// \param NumClauses Number of clauses to allocate
/// \param NumLoops Number of associated loops to allocate
+ /// \param NumLoopNests Number of top level loops to allocate
static OMPFuseDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses,
- unsigned NumLoops);
+ unsigned NumLoops,
+ unsigned NumLoopNests);
/// Gets the associated loops after the transformation. This is the de-sugared
/// replacement or nulltpr in dependent contexts.
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f31b6f8a3b26a..191618e7865dc 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11566,6 +11566,11 @@ def err_omp_not_a_loop_sequence : Error <
"statement after '#pragma omp %0' must be a loop sequence containing canonical loops or loop-generating constructs">;
def err_omp_empty_loop_sequence : Error <
"loop sequence after '#pragma omp %0' must contain at least 1 canonical loop or loop-generating construct">;
+def err_omp_invalid_looprange : Error <
+ "loop range in '#pragma omp %0' exceeds the number of available loops: "
+ "range end '%1' is greater than the total number of loops '%2'">;
+def warn_omp_redundant_fusion : Warning <
+ "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">;
def err_omp_not_for : Error<
"%select{statement after '#pragma omp %1' must be a for loop|"
"expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">;
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index e6492b81dfff8..965dcb7da26d8 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -6739,6 +6739,9 @@ class Parser : public CodeCompletionHandler {
OpenMPClauseKind Kind,
bool ParseOnly);
+ /// Parses the 'looprange' clause of a '#pragma omp fuse' directive.
+ OMPClause *ParseOpenMPLoopRangeClause();
+
/// Parses the 'sizes' clause of a '#pragma omp tile' directive.
OMPClause *ParseOpenMPSizesClause();
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index 8d78c2197c89d..f4a075e54cebe 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -921,6 +921,12 @@ class SemaOpenMP : public SemaBase {
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc);
+
+ /// Called on well-form 'looprange' clause after parsing its arguments.
+ OMPClause *
+ ActOnOpenMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc,
+ SourceLocation LParenLoc, SourceLocation FirstLoc,
+ SourceLocation CountLoc, SourceLocation EndLoc);
/// Called on well-formed 'ordered' clause.
OMPClause *
ActOnOpenMPOrderedClause(SourceLocation StartLoc, SourceLocation EndLoc,
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 0e5052b944162..0b5808eb100e4 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -1024,6 +1024,26 @@ OMPPartialClause *OMPPartialClause::CreateEmpty(const ASTContext &C) {
return new (C) OMPPartialClause();
}
+OMPLoopRangeClause *
+OMPLoopRangeClause::Create(const ASTContext &C, SourceLocation StartLoc,
+ SourceLocation LParenLoc, SourceLocation EndLoc,
+ SourceLocation FirstLoc, SourceLocation CountLoc,
+ Expr *First, Expr *Count) {
+ OMPLoopRangeClause *Clause = CreateEmpty(C);
+ Clause->setLocStart(StartLoc);
+ Clause->setLParenLoc(LParenLoc);
+ Clause->setLocEnd(EndLoc);
+ Clause->setFirstLoc(FirstLoc);
+ Clause->setCountLoc(CountLoc);
+ Clause->setFirst(First);
+ Clause->setCount(Count);
+ return Clause;
+}
+
+OMPLoopRangeClause *OMPLoopRangeClause::CreateEmpty(const ASTContext &C) {
+ return new (C) OMPLoopRangeClause();
+}
+
OMPAllocateClause *OMPAllocateClause::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
Expr *Allocator, Expr *Alignment, SourceLocation ColonLoc,
@@ -1888,6 +1908,21 @@ void OMPClausePrinter::VisitOMPPartialClause(OMPPartialClause *Node) {
}
}
+void OMPClausePrinter::VisitOMPLoopRangeClause(OMPLoopRangeClause *Node) {
+ OS << "looprange";
+
+ Expr *First = Node->getFirst();
+ Expr *Count = Node->getCount();
+
+ if (First && Count) {
+ OS << "(";
+ First->printPretty(OS, nullptr, Policy, 0);
+ OS << ",";
+ Count->printPretty(OS, nullptr, Policy, 0);
+ OS << ")";
+ }
+}
+
void OMPClausePrinter::VisitOMPAllocatorClause(OMPAllocatorClause *Node) {
OS << "allocator(";
Node->getAllocator()->printPretty(OS, nullptr, Policy, 0);
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index 4a6133766ef1c..06c987e7f1761 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -524,10 +524,13 @@ OMPFuseDirective *OMPFuseDirective::Create(
OMPFuseDirective *OMPFuseDirective::CreateEmpty(const ASTContext &C,
unsigned NumClauses,
- unsigned NumLoops) {
- return createEmptyDirective<OMPFuseDirective>(
+ unsigned NumLoops,
+ unsigned NumLoopNests) {
+ OMPFuseDirective *Dir = createEmptyDirective<OMPFuseDirective>(
C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1,
SourceLocation(), SourceLocation(), NumLoops);
+ Dir->setNumGeneratedLoopNests(NumLoopNests);
+ return Dir;
}
OMPForSimdDirective *
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 99d426db985e8..9f0ce076c35fa 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -511,6 +511,13 @@ void OMPClauseProfiler::VisitOMPPartialClause(const OMPPartialClause *C) {
Profiler->VisitExpr(Factor);
}
+void OMPClauseProfiler::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) {
+ if (const Expr *First = C->getFirst())
+ Profiler->VisitExpr(First);
+ if (const Expr *Count = C->getCount())
+ Profiler->VisitExpr(Count);
+}
+
void OMPClauseProfiler::VisitOMPAllocatorClause(const OMPAllocatorClause *C) {
if (C->getAllocator())
Profiler->VisitStmt(C->getAllocator());
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index d172450512f13..18330181f1509 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -248,6 +248,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str,
case OMPC_affinity:
case OMPC_when:
case OMPC_append_args:
+ case OMPC_looprange:
break;
default:
break;
@@ -583,6 +584,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind,
case OMPC_affinity:
case OMPC_when:
case OMPC_append_args:
+ case OMPC_looprange:
break;
default:
break;
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index cfffcdb01a514..ade5192d1968d 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -3041,6 +3041,39 @@ OMPClause *Parser::ParseOpenMPSizesClause() {
OpenLoc, CloseLoc);
}
+OMPClause *Parser::ParseOpenMPLoopRangeClause() {
+ SourceLocation ClauseNameLoc = ConsumeToken();
+ SourceLocation FirstLoc, CountLoc;
+
+ BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end);
+ if (T.consumeOpen()) {
+ Diag(Tok, diag::err_expected) << tok::l_paren;
+ return nullptr;
+ }
+
+ FirstLoc = Tok.getLocation();
+ ExprResult FirstVal = ParseConstantExpression();
+ if (!FirstVal.isUsable()) {
+ T.skipToEnd();
+ return nullptr;
+ }
+
+ ExpectAndConsume(tok::comma);
+
+ CountLoc = Tok.getLocation();
+ ExprResult CountVal = ParseConstantExpression();
+ if (!CountVal.isUsable()) {
+ T.skipToEnd();
+ return nullptr;
+ }
+
+ T.consumeClose();
+
+ return Actions.OpenMP().ActOnOpenMPLoopRangeClause(
+ FirstVal.get(), CountVal.get(), ClauseNameLoc, T.getOpenLocation(),
+ FirstLoc, CountLoc, T.getCloseLocation());
+}
+
OMPClause *Parser::ParseOpenMPPermutationClause() {
SourceLocation ClauseNameLoc, OpenLoc, CloseLoc;
SmallVector<Expr *> ArgExprs;
@@ -3469,6 +3502,9 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
}
Clause = ParseOpenMPClause(CKind, WrongDirective);
break;
+ case OMPC_looprange:
+ Clause = ParseOpenMPLoopRangeClause();
+ break;
default:
break;
}
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index bd8bee64a9d2f..556b5cb43b6f8 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -14289,7 +14289,6 @@ bool SemaOpenMP::checkTransformableLoopSequence(
// and tries to match the input AST to the canonical loop sequence grammar
// structure
- auto NLCV = NestedLoopCounterVisitor();
// Helper functions to validate canonical loop sequence grammar is valid
auto isLoopSequenceDerivation = [](auto *Child) {
return isa<ForStmt>(Child) || isa<CXXForRangeStmt>(Child) ||
@@ -14392,7 +14391,7 @@ bool SemaOpenMP::checkTransformableLoopSequence(
// Modularized code for handling regular canonical loops
auto handleRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits,
- &LoopSeqSize, &NumLoops, Kind, &TmpDSA, &NLCV,
+ &LoopSeqSize, &NumLoops, Kind, &TmpDSA,
this](Stmt *Child) {
OriginalInits.emplace_back();
LoopHelpers.emplace_back();
@@ -14405,8 +14404,11 @@ bool SemaOpenMP::checkTransformableLoopSequence(
<< getOpenMPDirectiveName(Kind);
return false;
}
+
storeLoopStatements(Child);
- NumLoops += NLCV.TraverseStmt(Child);
+ auto NLCV = NestedLoopCounterVisitor();
+ NLCV.TraverseStmt(Child);
+ NumLoops += NLCV.getNestedLoopCount();
return true;
};
@@ -15732,6 +15734,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
Stmt *AStmt,
SourceLocation StartLoc,
SourceLocation EndLoc) {
+
ASTContext &Context = getASTContext();
DeclContext *CurrContext = SemaRef.CurContext;
Scope *CurScope = SemaRef.getCurScope();
@@ -15748,7 +15751,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
SmallVector<SmallVector<Stmt *, 0>> OriginalInits;
unsigned NumLoops;
- // TODO: Support looprange clause using LoopSeqSize
unsigned LoopSeqSize;
if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops,
LoopHelpers, LoopStmts, OriginalInits,
@@ -15757,10 +15759,67 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
}
// Defer transformation in dependent contexts
+ // The NumLoopNests argument is set to a placeholder (0)
+ // because a dependent context could prevent determining its true value
if (CurrContext->isDependentContext()) {
return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses,
- NumLoops, 1, AStmt, nullptr, nullptr);
+ NumLoops, 0, AStmt, nullptr, nullptr);
}
+
+ // Handle clauses, which can be any of the following: [looprange, apply]
+ const OMPLoopRangeClause *LRC =
+ OMPExecutableDirective::getSingleClause<OMPLoopRangeClause>(Clauses);
+
+ // The clause arguments are invalidated if any error arises
+ // such as non-constant or non-positive arguments
+ if (LRC && (!LRC->getFirst() || !LRC->getCount()))
+ return StmtError();
+
+ // Delayed semantic check of LoopRange constraint
+ // Evaluates the loop range arguments and returns the first and count values
+ auto EvaluateLoopRangeArguments = [&Context](Expr *First, Expr *Count,
+ uint64_t &FirstVal,
+ uint64_t &CountVal) {
+ llvm::APSInt FirstInt = First->EvaluateKnownConstInt(Context);
+ llvm::APSInt CountInt = Count->EvaluateKnownConstInt(Context);
+ FirstVal = FirstInt.getZExtValue();
+ CountVal = CountInt.getZExtValue();
+ };
+
+ // Checks if the loop range is valid
+ auto ValidLoopRange = [](uint64_t FirstVal, uint64_t CountVal,
+ unsigned NumLoops) -> bool {
+ return FirstVal + CountVal - 1 <= NumLoops;
+ };
+ uint64_t FirstVal = 1, CountVal = 0, LastVal = LoopSeqSize;
+
+ if (LRC) {
+ EvaluateLoopRangeArguments(LRC->getFirst(), LRC->getCount(), FirstVal,
+ CountVal);
+ if (CountVal == 1)
+ SemaRef.Diag(LRC->getCountLoc(), diag::warn_omp_redundant_fusion)
+ << getOpenMPDirectiveName(OMPD_fuse);
+
+ if (!ValidLoopRange(FirstVal, CountVal, LoopSeqSize)) {
+ SemaRef.Diag(LRC->getFirstLoc(), diag::err_omp_invalid_looprange)
+ << getOpenMPDirectiveName(OMPD_fuse) << (FirstVal + CountVal - 1)
+ << LoopSeqSize;
+ return StmtError();
+ }
+
+ LastVal = FirstVal + CountVal - 1;
+ }
+
+ // Complete fusion generates a single canonical loop nest
+ // However looprange clause generates several loop nests
+ unsigned NumLoopNests = LRC ? LoopSeqSize - CountVal + 1 : 1;
+
+ // Emit a warning for redundant loop fusion when the sequence contains only
+ // one loop.
+ if (LoopSeqSize == 1)
+ SemaRef.Diag(AStmt->getBeginLoc(), diag::warn_omp_redundant_fusion)
+ << getOpenMPDirectiveName(OMPD_fuse);
+
assert(LoopHelpers.size() == LoopSeqSize &&
"Expecting loop iteration space dimensionality to match number of "
"affected loops");
@@ -15774,8 +15833,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
SmallVector<Stmt *> PreInits;
// Select the type with the largest bit width among all induction variables
- QualType IVType = LoopHelpers[0].IterationVarRef->getType();
- for (unsigned int I = 1; I < LoopSeqSize; ++I) {
+ QualType IVType = LoopHelpers[FirstVal - 1].IterationVarRef->getType();
+ for (unsigned int I = FirstVal; I < LastVal; ++I) {
QualType CurrentIVType = LoopHelpers[I].IterationVarRef->getType();
if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) {
IVType = CurrentIVType;
@@ -15824,20 +15883,21 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
// Process each single loop to generate and collect declarations
// and statements for all helper expressions
- for (unsigned int I = 0; I < LoopSeqSize; ++I) {
+ for (unsigned int I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) {
addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I],
PreInits);
- auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", I);
- auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", I);
- auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", I);
+ auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", J);
+ auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", J);
+ auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", J);
auto [NIVD, NIDStmt] =
- CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", I, true);
+ CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", J, true);
auto [IVVD, IVDStmt] =
- CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", I);
+ CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", J);
if (!LBVD || !STVD || !NIVD || !IVVD)
- return StmtError();
+ assert(LBVD && STVD && NIVD && IVVD &&
+ "OpenMP Fuse Helper variables creation failed");
UBVarDecls.push_back(UBVD);
LBVarDecls.push_back(LBVD);
@@ -15912,8 +15972,9 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
// omp.fuse.max = max(omp.temp1, omp.temp0)
ExprResult MaxExpr;
- for (unsigned I = 0; I < LoopSeqSize; ++I) {
- DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[I]);
+ // I is the true
+ for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) {
+ DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[J]);
QualType NITy = NIRef->getType();
if (MaxExpr.isUnset()) {
@@ -15921,7 +15982,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
MaxExpr = NIRef;
} else {
// Create a new acummulator variable t_i = MaxExpr
- std::string TempName = (Twine(".omp.temp.") + Twine(I)).str();
+ std::string TempName = (Twine(".omp.temp.") + Twine(J)).str();
VarDecl *TempDecl =
buildVarDecl(SemaRef, {}, NITy, TempName, nullptr, nullptr);
TempDecl->setInit(MaxExpr.get());
@@ -15944,7 +16005,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
if (!Comparison.isUsable())
return StmtError();
- DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[I]);
+ DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[J]);
// Update MaxExpr using a conditional expression to hold the max value
MaxExpr = new (Context) ConditionalOperator(
Comparison.get(), SourceLocation(), TempRef2, SourceLocation(),
@@ -15997,23 +16058,21 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
CompoundStmt *FusedBody = nullptr;
SmallVector<Stmt *, 4> FusedBodyStmts;
- for (unsigned I = 0; I < LoopSeqSize; ++I) {
-
+ for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) {
// Assingment of the original sub-loop index to compute the logical index
// IV_k = LB_k + omp.fuse.index * ST_k
-
ExprResult IdxExpr =
SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul,
- MakeVarDeclRef(STVarDecls[I]), MakeIVRef());
+ MakeVarDeclRef(STVarDecls[J]), MakeIVRef());
if (!IdxExpr.isUsable())
return StmtError();
IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Add,
- MakeVarDeclRef(LBVarDecls[I]), IdxExpr.get());
+ MakeVarDeclRef(LBVarDecls[J]), IdxExpr.get());
if (!IdxExpr.isUsable())
return StmtError();
IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Assign,
- MakeVarDeclRef(IVVarDecls[I]), IdxExpr.get());
+ MakeVarDeclRef(IVVarDecls[J]), IdxExpr.get());
if (!IdxExpr.isUsable())
return StmtError();
@@ -16028,7 +16087,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
Stmt *Body = (isa<ForStmt>(LoopStmts[I]))
? cast<ForStmt>(LoopStmts[I])->getBody()
: cast<CXXForRangeStmt>(LoopStmts[I])->getBody();
-
BodyStmts.push_back(Body);
CompoundStmt *CombinedBody =
@@ -16036,7 +16094,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
SourceLocation(), SourceLocation());
ExprResult Condition =
SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, MakeIVRef(),
- MakeVarDeclRef(NIVarDecls[I]));
+ MakeVarDeclRef(NIVarDecls[J]));
if (!Condition.isUsable())
return StmtError();
@@ -16057,8 +16115,26 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(),
IncrExpr.get()->getEndLoc());
+ // In the case of looprange, the result of fuse won't simply
+ // be a single loop (ForStmt), but rather a loop sequence
+ // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop
+ // and the post-fusion loops, preserving its original order.
+ Stmt *FusionStmt = FusedForStmt;
+ if (LRC) {
+ SmallVector<Stmt *, 4> FinalLoops;
+ // Gather all the pre-fusion loops
+ for (unsigned I = 0; I < FirstVal - 1; ++I)
+ FinalLoops.push_back(LoopStmts[I]);
+ // Gather the fused loop
+ FinalLoops.push_back(FusedForStmt);
+ // Gather all the post-fusion loops
+ for (unsigned I = FirstVal + CountVal - 1; I < LoopSeqSize; ++I)
+ FinalLoops.push_back(LoopStmts[I]);
+ FusionStmt = CompoundStmt::Create(Context, FinalLoops, FPOptionsOverride(),
+ SourceLocation(), SourceLocation());
+ }
return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, NumLoops,
- 1, AStmt, FusedForStmt,
+ NumLoopNests, AStmt, FusionStmt,
buildPreInits(Context, PreInits));
}
@@ -17181,6 +17257,31 @@ OMPClause *SemaOpenMP::ActOnOpenMPPartialClause(Expr *FactorExpr,
FactorExpr);
}
+OMPClause *SemaOpenMP::ActOnOpenMPLoopRangeClause(
+ Expr *First, Expr *Count, SourceLocation StartLoc, SourceLocation LParenLoc,
+ SourceLocation FirstLoc, SourceLocation CountLoc, SourceLocation EndLoc) {
+
+ // OpenMP [6.0, Restrictions]
+ // First and Count must be integer expressions with positive value
+ ExprResult FirstVal =
+ VerifyPositiveIntegerConstantInClause(First, OMPC_looprange);
+ if (FirstVal.isInvalid())
+ First = nullptr;
+
+ ExprResult CountVal =
+ VerifyPositiveIntegerConstantInClause(Count, OMPC_looprange);
+ if (CountVal.isInvalid())
+ Count = nullptr;
+
+ // OpenMP [6.0, Restrictions]
+ // first + count - 1 must not evaluate to a value greater than the
+ // loop sequence length of the associated canonical loop sequence.
+ // This check must be performed afterwards due to the delayed
+ // parsing and computation of the associated loop sequence
+ return OMPLoopRangeClause::Create(getASTContext(), StartLoc, LParenLoc,
+ FirstLoc, CountLoc, EndLoc, First, Count);
+}
+
OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 034b0c8243667..d70e2a3874c07 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1775,6 +1775,14 @@ class TreeTransform {
LParenLoc, EndLoc);
}
+ OMPClause *
+ RebuildOMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc,
+ SourceLocation LParenLoc, SourceLocation FirstLoc,
+ SourceLocation CountLoc, SourceLocation EndLoc) {
+ return getSema().OpenMP().ActOnOpenMPLoopRangeClause(
+ First, Count, StartLoc, LParenLoc, FirstLoc, CountLoc, EndLoc);
+ }
+
/// Build a new OpenMP 'allocator' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
@@ -10569,6 +10577,31 @@ TreeTransform<Derived>::TransformOMPPartialClause(OMPPartialClause *C) {
C->getEndLoc());
}
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPLoopRangeClause(OMPLoopRangeClause *C) {
+ ExprResult F = getDerived().TransformExpr(C->getFirst());
+ if (F.isInvalid())
+ return nullptr;
+
+ ExprResult Cn = getDerived().TransformExpr(C->getCount());
+ if (Cn.isInvalid())
+ return nullptr;
+
+ Expr *First = F.get();
+ Expr *Count = Cn.get();
+
+ bool Changed = (First != C->getFirst()) || (Count != C->getCount());
+
+ // If no changes and AlwaysRebuild() is false, return the original clause
+ if (!Changed && !getDerived().AlwaysRebuild())
+ return C;
+
+ return RebuildOMPLoopRangeClause(First, Count, C->getBeginLoc(),
+ C->getLParenLoc(), C->getFirstLoc(),
+ C->getCountLoc(), C->getEndLoc());
+}
+
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPCollapseClause(OMPCollapseClause *C) {
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index d068f5e163176..8591eb9394fa5 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -11088,6 +11088,9 @@ OMPClause *OMPClauseReader::readClause() {
case llvm::omp::OMPC_partial:
C = OMPPartialClause::CreateEmpty(Context);
break;
+ case llvm::omp::OMPC_looprange:
+ C = OMPLoopRangeClause::CreateEmpty(Context);
+ break;
case llvm::omp::OMPC_allocator:
C = new (Context) OMPAllocatorClause();
break;
@@ -11489,6 +11492,14 @@ void OMPClauseReader::VisitOMPPartialClause(OMPPartialClause *C) {
C->setLParenLoc(Record.readSourceLocation());
}
+void OMPClauseReader::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) {
+ C->setFirst(Record.readSubExpr());
+ C->setCount(Record.readSubExpr());
+ C->setLParenLoc(Record.readSourceLocation());
+ C->setFirstLoc(Record.readSourceLocation());
+ C->setCountLoc(Record.readSourceLocation());
+}
+
void OMPClauseReader::VisitOMPAllocatorClause(OMPAllocatorClause *C) {
C->setAllocator(Record.readExpr());
C->setLParenLoc(Record.readSourceLocation());
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 6762d11d6b73e..a301e1c0b0e32 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -3621,7 +3621,9 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
case STMT_OMP_FUSE_DIRECTIVE: {
unsigned NumLoops = Record[ASTStmtReader::NumStmtFields];
unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1];
- S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops);
+ unsigned NumLoopNests = Record[ASTStmtReader::NumStmtFields + 2];
+ S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops,
+ NumLoopNests);
break;
}
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 1b3d3c22aa9f5..8548f7e50d34b 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -7782,6 +7782,14 @@ void OMPClauseWriter::VisitOMPPartialClause(OMPPartialClause *C) {
Record.AddSourceLocation(C->getLParenLoc());
}
+void OMPClauseWriter::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) {
+ Record.AddStmt(C->getFirst());
+ Record.AddStmt(C->getCount());
+ Record.AddSourceLocation(C->getLParenLoc());
+ Record.AddSourceLocation(C->getFirstLoc());
+ Record.AddSourceLocation(C->getCountLoc());
+}
+
void OMPClauseWriter::VisitOMPAllocatorClause(OMPAllocatorClause *C) {
Record.AddStmt(C->getAllocator());
Record.AddSourceLocation(C->getLParenLoc());
diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp
index 43ce815dab024..ac4f0d38a9c68 100644
--- a/clang/test/OpenMP/fuse_ast_print.cpp
+++ b/clang/test/OpenMP/fuse_ast_print.cpp
@@ -271,6 +271,73 @@ void foo7() {
}
+// PRINT-LABEL: void foo8(
+// DUMP-LABEL: FunctionDecl {{.*}} foo8
+void foo8() {
+ // PRINT: #pragma omp fuse looprange(2,2)
+ // DUMP: OMPFuseDirective
+ // DUMP: OMPLooprangeClause
+ #pragma omp fuse looprange(2,2)
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i < 10; i += 2)
+ // DUMP: ForStmt
+ for (int i = 0; i < 10; i += 2)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: for (int j = 10; j > 0; --j)
+ // DUMP: ForStmt
+ for (int j = 10; j > 0; --j)
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+ // PRINT: for (int k = 0; k <= 10; ++k)
+ // DUMP: ForStmt
+ for (int k = 0; k <= 10; ++k)
+ // PRINT: body(k)
+ // DUMP: CallExpr
+ body(k);
+
+ }
+
+}
+
+//PRINT-LABEL: void foo9(
+//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo9
+//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} F
+//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} C
+template<int F, int C>
+void foo9() {
+ // PRINT: #pragma omp fuse looprange(F,C)
+ // DUMP: OMPFuseDirective
+ // DUMP: OMPLooprangeClause
+ #pragma omp fuse looprange(F,C)
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i < 10; i += 2)
+ // DUMP: ForStmt
+ for (int i = 0; i < 10; i += 2)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: for (int j = 10; j > 0; --j)
+ // DUMP: ForStmt
+ for (int j = 10; j > 0; --j)
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+
+ }
+}
+
+// Also test instantiating the template.
+void tfoo9() {
+ foo9<1, 2>();
+}
+
diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp
index 6c1e21092da43..d9500bed3ce31 100644
--- a/clang/test/OpenMP/fuse_codegen.cpp
+++ b/clang/test/OpenMP/fuse_codegen.cpp
@@ -53,6 +53,18 @@ extern "C" void foo3() {
}
}
+extern "C" void foo4() {
+ double arr[256];
+
+ #pragma omp fuse looprange(2,2)
+ {
+ for(int i = 0; i < 128; ++i) body(i);
+ for(int j = 0; j < 256; j+=2) body(j);
+ for(int k = 0; k < 64; ++k) body(k);
+ for(int c = 42; auto &&v: arr) body(c,v);
+ }
+}
+
#endif
// CHECK1-LABEL: define dso_local void @body(
@@ -777,6 +789,157 @@ extern "C" void foo3() {
// CHECK1-NEXT: ret void
//
//
+// CHECK1-LABEL: define dso_local void @foo4(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: store i32 0, ptr [[J]], align 4
+// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[K]], align 4
+// CHECK1-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1: [[COND_TRUE]]:
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: br label %[[COND_END:.*]]
+// CHECK1: [[COND_FALSE]]:
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: br label %[[COND_END]]
+// CHECK1: [[COND_END]]:
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND:.*]]
+// CHECK1: [[FOR_COND]]:
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128
+// CHECK1-NEXT: br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1: [[FOR_BODY]]:
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP6]])
+// CHECK1-NEXT: br label %[[FOR_INC:.*]]
+// CHECK1: [[FOR_INC]]:
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK1: [[FOR_END]]:
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND2:.*]]
+// CHECK1: [[FOR_COND2]]:
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]]
+// CHECK1: [[FOR_BODY4]]:
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]]
+// CHECK1-NEXT: br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK1: [[IF_THEN]]:
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]]
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2
+// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
+// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP16]])
+// CHECK1-NEXT: br label %[[IF_END]]
+// CHECK1: [[IF_END]]:
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]]
+// CHECK1-NEXT: br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]]
+// CHECK1: [[IF_THEN9]]:
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]]
+// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]]
+// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1
+// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
+// CHECK1-NEXT: store i32 [[ADD13]], ptr [[K]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP23]])
+// CHECK1-NEXT: br label %[[IF_END14]]
+// CHECK1: [[IF_END14]]:
+// CHECK1-NEXT: br label %[[FOR_INC15:.*]]
+// CHECK1: [[FOR_INC15]]:
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP24]], 1
+// CHECK1-NEXT: store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK1: [[FOR_END17]]:
+// CHECK1-NEXT: store i32 42, ptr [[C]], align 4
+// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0
+// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256
+// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND19:.*]]
+// CHECK1: [[FOR_COND19]]:
+// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK1-NEXT: [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]]
+// CHECK1-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]]
+// CHECK1: [[FOR_BODY21]]:
+// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: store ptr [[TMP29]], ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[C]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]])
+// CHECK1-NEXT: br label %[[FOR_INC22:.*]]
+// CHECK1: [[FOR_INC22]]:
+// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1
+// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND19]]
+// CHECK1: [[FOR_END23]]:
+// CHECK1-NEXT: ret void
+//
+//
// CHECK2-LABEL: define dso_local void @body(
// CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: [[ENTRY:.*:]]
@@ -1259,6 +1422,157 @@ extern "C" void foo3() {
// CHECK2-NEXT: ret void
//
//
+// CHECK2-LABEL: define dso_local void @foo4(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: store i32 0, ptr [[J]], align 4
+// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[K]], align 4
+// CHECK2-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2: [[COND_TRUE]]:
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: br label %[[COND_END:.*]]
+// CHECK2: [[COND_FALSE]]:
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: br label %[[COND_END]]
+// CHECK2: [[COND_END]]:
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND:.*]]
+// CHECK2: [[FOR_COND]]:
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128
+// CHECK2-NEXT: br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2: [[FOR_BODY]]:
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP6]])
+// CHECK2-NEXT: br label %[[FOR_INC:.*]]
+// CHECK2: [[FOR_INC]]:
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK2: [[FOR_END]]:
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND2:.*]]
+// CHECK2: [[FOR_COND2]]:
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]]
+// CHECK2-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]]
+// CHECK2: [[FOR_BODY4]]:
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]]
+// CHECK2-NEXT: br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK2: [[IF_THEN]]:
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]]
+// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]]
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2
+// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
+// CHECK2-NEXT: store i32 [[ADD7]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP16]])
+// CHECK2-NEXT: br label %[[IF_END]]
+// CHECK2: [[IF_END]]:
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]]
+// CHECK2-NEXT: br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]]
+// CHECK2: [[IF_THEN9]]:
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]]
+// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]]
+// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1
+// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
+// CHECK2-NEXT: store i32 [[ADD13]], ptr [[K]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP23]])
+// CHECK2-NEXT: br label %[[IF_END14]]
+// CHECK2: [[IF_END14]]:
+// CHECK2-NEXT: br label %[[FOR_INC15:.*]]
+// CHECK2: [[FOR_INC15]]:
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP24]], 1
+// CHECK2-NEXT: store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK2: [[FOR_END17]]:
+// CHECK2-NEXT: store i32 42, ptr [[C]], align 4
+// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0
+// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256
+// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND19:.*]]
+// CHECK2: [[FOR_COND19]]:
+// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK2-NEXT: [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]]
+// CHECK2-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]]
+// CHECK2: [[FOR_BODY21]]:
+// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: store ptr [[TMP29]], ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[C]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]])
+// CHECK2-NEXT: br label %[[FOR_INC22:.*]]
+// CHECK2: [[FOR_INC22]]:
+// CHECK2-NEXT: [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1
+// CHECK2-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND19]]
+// CHECK2: [[FOR_END23]]:
+// CHECK2-NEXT: ret void
+//
+//
// CHECK2-LABEL: define dso_local void @tfoo2(
// CHECK2-SAME: ) #[[ATTR0]] {
// CHECK2-NEXT: [[ENTRY:.*:]]
@@ -1494,7 +1808,7 @@ extern "C" void foo3() {
// CHECK2-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1
// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK2: [[FOR_END]]:
// CHECK2-NEXT: ret void
//
@@ -1503,9 +1817,13 @@ extern "C" void foo3() {
// CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"}
// CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]}
// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
+// CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]}
+// CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
//.
// CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
// CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"}
// CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]}
// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
+// CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]}
+// CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
//.
diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp
index 50dedfd2c0dc6..2a2491d008a0b 100644
--- a/clang/test/OpenMP/fuse_messages.cpp
+++ b/clang/test/OpenMP/fuse_messages.cpp
@@ -33,6 +33,8 @@ void func() {
{
for (int i = 0; i < 7; ++i)
;
+ for(int j = 0; j < 100; ++j);
+
}
@@ -41,6 +43,8 @@ void func() {
{
for (int i = 0; i < 7; ++i)
;
+ for(int j = 0; j < 100; ++j);
+
}
//expected-error at +4 {{loop after '#pragma omp fuse' is not in canonical form}}
@@ -50,6 +54,7 @@ void func() {
for(int i = 0; i < 10; i*=2) {
;
}
+ for(int j = 0; j < 100; ++j);
}
//expected-error at +2 {{loop sequence after '#pragma omp fuse' must contain at least 1 canonical loop or loop-generating construct}}
@@ -73,4 +78,109 @@ void func() {
for(unsigned int j = 0; j < 10; ++j);
for(long long k = 0; k < 100; ++k);
}
-}
\ No newline at end of file
+
+ //expected-warning at +2 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}}
+ #pragma omp fuse
+ {
+ for(int i = 0; i < 10; ++i);
+ }
+
+ //expected-warning at +1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}}
+ #pragma omp fuse looprange(1, 1)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ }
+
+ //expected-error at +1 {{argument to 'looprange' clause must be a strictly positive integer value}}
+ #pragma omp fuse looprange(1, -1)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ }
+
+ //expected-error at +1 {{argument to 'looprange' clause must be a strictly positive integer value}}
+ #pragma omp fuse looprange(1, 0)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ }
+
+ const int x = 1;
+ constexpr int y = 4;
+ //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '4' is greater than the total number of loops '3'}}
+ #pragma omp fuse looprange(x,y)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+
+ //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '420' is greater than the total number of loops '3'}}
+ #pragma omp fuse looprange(1,420)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+}
+
+// In a template context, but expression itself not instantiation-dependent
+template <typename T>
+static void templated_func() {
+
+ //expected-warning at +1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}}
+ #pragma omp fuse looprange(2,1)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+
+ //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '5' is greater than the total number of loops '3'}}
+ #pragma omp fuse looprange(3,3)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+
+}
+
+template <int V>
+static void templated_func_value_dependent() {
+
+ //expected-warning at +1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}}
+ #pragma omp fuse looprange(V,1)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+}
+
+template <typename T>
+static void templated_func_type_dependent() {
+ constexpr T s = 1;
+
+ //expected-error at +1 {{argument to 'looprange' clause must be a strictly positive integer value}}
+ #pragma omp fuse looprange(s,s-1)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+}
+
+
+void template_inst() {
+ // expected-note at +1 {{in instantiation of function template specialization 'templated_func<int>' requested here}}
+ templated_func<int>();
+ // expected-note at +1 {{in instantiation of function template specialization 'templated_func_value_dependent<1>' requested here}}
+ templated_func_value_dependent<1>();
+ // expected-note at +1 {{in instantiation of function template specialization 'templated_func_type_dependent<int>' requested here}}
+ templated_func_type_dependent<int>();
+
+}
+
+
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index fd788ac3d69d4..38f5183b146ee 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2412,6 +2412,11 @@ void OMPClauseEnqueue::VisitOMPPartialClause(const OMPPartialClause *C) {
Visitor->AddStmt(C->getFactor());
}
+void OMPClauseEnqueue::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) {
+ Visitor->AddStmt(C->getFirst());
+ Visitor->AddStmt(C->getCount());
+}
+
void OMPClauseEnqueue::VisitOMPAllocatorClause(const OMPAllocatorClause *C) {
Visitor->AddStmt(C->getAllocator());
}
diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
index e0714e812e5cd..dd51274c1aaf5 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
@@ -1233,6 +1233,15 @@ struct WriteT {
using EmptyTrait = std::true_type;
};
+// V6: [6.4.7] Looprange clause
+template <typename T, typename I, typename E> struct LoopRangeT {
+ using Begin = E;
+ using End = E;
+
+ using TupleTrait = std::true_type;
+ std::tuple<Begin, End> t;
+};
+
// ---
template <typename T, typename I, typename E>
@@ -1263,9 +1272,10 @@ using TupleClausesT =
DefaultmapT<T, I, E>, DeviceT<T, I, E>, DistScheduleT<T, I, E>,
DoacrossT<T, I, E>, FromT<T, I, E>, GrainsizeT<T, I, E>,
IfT<T, I, E>, InitT<T, I, E>, InReductionT<T, I, E>,
- LastprivateT<T, I, E>, LinearT<T, I, E>, MapT<T, I, E>,
- NumTasksT<T, I, E>, OrderT<T, I, E>, ReductionT<T, I, E>,
- ScheduleT<T, I, E>, TaskReductionT<T, I, E>, ToT<T, I, E>>;
+ LastprivateT<T, I, E>, LinearT<T, I, E>, LoopRangeT<T, I, E>,
+ MapT<T, I, E>, NumTasksT<T, I, E>, OrderT<T, I, E>,
+ ReductionT<T, I, E>, ScheduleT<T, I, E>,
+ TaskReductionT<T, I, E>, ToT<T, I, E>>;
template <typename T, typename I, typename E>
using UnionClausesT = std::variant<DependT<T, I, E>>;
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 8286cfcadaafd..ae19385c022d0 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -271,6 +271,9 @@ def OMPC_Linear : Clause<"linear"> {
def OMPC_Link : Clause<"link"> {
let flangClass = "OmpObjectList";
}
+def OMPC_LoopRange : Clause<"looprange"> {
+ let clangClass = "OMPLoopRangeClause";
+}
def OMPC_Map : Clause<"map"> {
let clangClass = "OMPMapClause";
let flangClass = "OmpMapClause";
@@ -853,6 +856,9 @@ def OMP_For : Directive<"for"> {
let languages = [L_C];
}
def OMP_Fuse : Directive<"fuse"> {
+ let allowedOnceClauses = [
+ VersionedClause<OMPC_LoopRange, 60>
+ ];
let association = AS_Loop;
let category = CA_Executable;
}
>From c1e5fc3fe2ac7f126a76b44906b30029e3cc797b Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:30:39 +0000
Subject: [PATCH 3/9] Addef fuse to documentation
---
clang/docs/OpenMPSupport.rst | 2 ++
clang/docs/ReleaseNotes.rst | 1 +
2 files changed, 3 insertions(+)
diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst
index d6507071d4693..5f0e363792b32 100644
--- a/clang/docs/OpenMPSupport.rst
+++ b/clang/docs/OpenMPSupport.rst
@@ -376,6 +376,8 @@ implementation.
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| loop stripe transformation | :good:`done` | https://github.com/llvm/llvm-project/pull/119891 |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| loop fuse transformation | :good:`done` | :none:`unclaimed` | |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| work distribute construct | :none:`unclaimed` | :none:`unclaimed` | |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| task_iteration | :none:`unclaimed` | :none:`unclaimed` | |
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 573ae97bff710..2188e42dc705c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1016,6 +1016,7 @@ OpenMP Support
open parenthesis. (#GH139665)
- An error is now emitted when OpenMP ``collapse`` and ``ordered`` clauses have
an argument larger than what can fit within a 64-bit integer.
+- Added support for 'omp fuse' directive.
Improvements
^^^^^^^^^^^^
>From 33119f77c07cc3ecbb5b3360fd8f63a958e808c1 Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:43:41 +0000
Subject: [PATCH 4/9] Refactored preinits handling and improved coverage
---
clang/docs/OpenMPSupport.rst | 2 +-
clang/include/clang/AST/StmtOpenMP.h | 5 +-
clang/include/clang/Sema/SemaOpenMP.h | 96 +-
clang/lib/AST/StmtOpenMP.cpp | 13 +
clang/lib/Basic/OpenMPKinds.cpp | 3 +-
clang/lib/CodeGen/CGExpr.cpp | 2 +
clang/lib/CodeGen/CodeGenFunction.h | 4 +
clang/lib/Sema/SemaOpenMP.cpp | 588 ++++---
clang/test/OpenMP/fuse_ast_print.cpp | 55 +
clang/test/OpenMP/fuse_codegen.cpp | 2117 +++++++++++++++----------
10 files changed, 1862 insertions(+), 1023 deletions(-)
diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst
index 5f0e363792b32..b39f9d3634a63 100644
--- a/clang/docs/OpenMPSupport.rst
+++ b/clang/docs/OpenMPSupport.rst
@@ -376,7 +376,7 @@ implementation.
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| loop stripe transformation | :good:`done` | https://github.com/llvm/llvm-project/pull/119891 |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| loop fuse transformation | :good:`done` | :none:`unclaimed` | |
+| loop fuse transformation | :good:`prototyped` | :none:`unclaimed` | |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| work distribute construct | :none:`unclaimed` | :none:`unclaimed` | |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index 85bde292ca748..b6a948a8c6020 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -1005,8 +1005,7 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective {
Stmt::StmtClass C = T->getStmtClass();
return C == OMPTileDirectiveClass || C == OMPUnrollDirectiveClass ||
C == OMPReverseDirectiveClass || C == OMPInterchangeDirectiveClass ||
- C == OMPStripeDirectiveClass ||
- C == OMPFuseDirectiveClass;
+ C == OMPStripeDirectiveClass || C == OMPFuseDirectiveClass;
}
};
@@ -5653,6 +5652,8 @@ class OMPStripeDirective final : public OMPLoopTransformationDirective {
llvm::omp::OMPD_stripe, StartLoc, EndLoc,
NumLoops) {
setNumGeneratedLoops(2 * NumLoops);
+ // Similar to Tile, it only generates a single top level loop nest
+ setNumGeneratedLoopNests(1);
}
void setPreInits(Stmt *PreInits) {
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index f4a075e54cebe..ac4cbe3709a0d 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -1493,16 +1493,96 @@ class SemaOpenMP : public SemaBase {
SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits);
- /// Analyzes and checks a loop sequence for use by a loop transformation
+ /// @brief Categories of loops encountered during semantic OpenMP loop
+ /// analysis
+ ///
+ /// This enumeration identifies the structural category of a loop or sequence
+ /// of loops analyzed in the context of OpenMP transformations and directives.
+ /// This categorization helps differentiate between original source loops
+ /// and the structures resulting from applying OpenMP loop transformations.
+ enum class OMPLoopCategory {
+
+ /// @var OMPLoopCategory::RegularLoop
+ /// Represents a standard canonical loop nest found in the
+ /// original source code or an intact loop after transformations
+ /// (i.e Post/Pre loops of a loopranged fusion)
+ RegularLoop,
+
+ /// @var OMPLoopCategory::TransformSingleLoop
+ /// Represents the resulting loop structure when an OpenMP loop
+ // transformation, generates a single, top-level loop
+ TransformSingleLoop,
+
+ /// @var OMPLoopCategory::TransformLoopSequence
+ /// Represents the resulting loop structure when an OpenMP loop
+ /// transformation
+ /// generates a sequence of two or more canonical loop nests
+ TransformLoopSequence
+ };
+
+ /// The main recursive process of `checkTransformableLoopSequence` that
+ /// performs grammatical parsing of a canonical loop sequence. It extracts
+ /// key information, such as the number of top-level loops, loop statements,
+ /// helper expressions, and other relevant loop-related data, all in a single
+ /// execution to avoid redundant traversals. This analysis flattens inner
+ /// Loop Sequences
+ ///
+ /// \param LoopSeqStmt The AST of the original statement.
+ /// \param LoopSeqSize [out] Number of top level canonical loops.
+ /// \param NumLoops [out] Number of total canonical loops (nested too).
+ /// \param LoopHelpers [out] The multiple loop analyses results.
+ /// \param ForStmts [out] The multiple Stmt of each For loop.
+ /// \param OriginalInits [out] The raw original initialization statements
+ /// of each belonging to a loop of the loop sequence
+ /// \param TransformPreInits [out] The multiple collection of statements and
+ /// declarations that must have been executed/declared
+ /// before entering the loop (each belonging to a
+ /// particular loop transformation, nullptr otherwise)
+ /// \param LoopSequencePreInits [out] Additional general collection of loop
+ /// transformation related statements and declarations
+ /// not bounded to a particular loop that must be
+ /// executed before entering the loop transformation
+ /// \param LoopCategories [out] A sequence of OMPLoopCategory values,
+ /// one for each loop or loop transformation node
+ /// successfully analyzed.
+ /// \param Context
+ /// \param Kind The loop transformation directive kind.
+ /// \return Whether the original statement is both syntactically and
+ /// semantically correct according to OpenMP 6.0 canonical loop
+ /// sequence definition.
+ bool analyzeLoopSequence(
+ Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops,
+ SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
+ SmallVectorImpl<Stmt *> &ForStmts,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+ SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context,
+ OpenMPDirectiveKind Kind);
+
+ /// Validates and checks whether a loop sequence can be transformed according
+ /// to the given directive, providing necessary setup and initialization
+ /// (Driver function) before recursion using `analyzeLoopSequence`.
///
/// \param Kind The loop transformation directive kind.
- /// \param NumLoops [out] Number of total canonical loops
- /// \param LoopSeqSize [out] Number of top level canonical loops
+ /// \param AStmt The AST of the original statement
+ /// \param LoopSeqSize [out] Number of top level canonical loops.
+ /// \param NumLoops [out] Number of total canonical loops (nested too)
/// \param LoopHelpers [out] The multiple loop analyses results.
- /// \param LoopStmts [out] The multiple Stmt of each For loop.
- /// \param OriginalInits [out] The multiple collection of statements and
+ /// \param ForStmts [out] The multiple Stmt of each For loop.
+ /// \param OriginalInits [out] The raw original initialization statements
+ /// of each belonging to a loop of the loop sequence
+ /// \param TransformsPreInits [out] The multiple collection of statements and
/// declarations that must have been executed/declared
- /// before entering the loop.
+ /// before entering the loop (each belonging to a
+ /// particular loop transformation, nullptr otherwise)
+ /// \param LoopSequencePreInits [out] Additional general collection of loop
+ /// transformation related statements and declarations
+ /// not bounded to a particular loop that must be
+ /// executed before entering the loop transformation
+ /// \param LoopCategories [out] A sequence of OMPLoopCategory values,
+ /// one for each loop or loop transformation node
+ /// successfully analyzed.
/// \param Context
/// \return Whether there was an absence of errors or not
bool checkTransformableLoopSequence(
@@ -1511,7 +1591,9 @@ class SemaOpenMP : public SemaBase {
SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
SmallVectorImpl<Stmt *> &ForStmts,
SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
- ASTContext &Context);
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+ SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context);
/// Helper to keep information about the current `omp begin/end declare
/// variant` nesting.
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index 06c987e7f1761..e6b52792885ba 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -457,6 +457,8 @@ OMPUnrollDirective::Create(const ASTContext &C, SourceLocation StartLoc,
C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc);
Dir->setNumGeneratedLoops(NumGeneratedLoops);
// The number of generated loops and loop nests during unroll matches
+ // given that unroll only generates top level canonical loop nests
+ // so each generated loop is a top level canonical loop nest
Dir->setNumGeneratedLoopNests(NumGeneratedLoops);
Dir->setTransformedStmt(TransformedStmt);
Dir->setPreInits(PreInits);
@@ -517,6 +519,17 @@ OMPFuseDirective *OMPFuseDirective::Create(
NumLoops);
Dir->setTransformedStmt(TransformedStmt);
Dir->setPreInits(PreInits);
+ // The number of top level canonical nests could
+ // not match the total number of generated loops
+ // Example:
+ // Before fusion:
+ // for (int i = 0; i < N; ++i)
+ // for (int j = 0; j < M; ++j)
+ // A[i][j] = i + j;
+ //
+ // for (int k = 0; k < P; ++k)
+ // B[k] = k * 2;
+ // Here, NumLoopNests = 2, but NumLoops = 3.
Dir->setNumGeneratedLoopNests(NumLoopNests);
Dir->setNumGeneratedLoops(NumLoops);
return Dir;
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index 18330181f1509..53a9f80e6d3b7 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -704,7 +704,8 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) {
bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) {
return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse ||
- DKind == OMPD_interchange || DKind == OMPD_stripe || DKind == OMPD_fuse;
+ DKind == OMPD_interchange || DKind == OMPD_stripe ||
+ DKind == OMPD_fuse;
}
bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) {
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 7cb7ee20fcf6a..1671f07bc2760 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3242,6 +3242,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
// No other cases for now.
} else {
+ llvm::dbgs() << "THE DAMN DECLREFEXPR HASN'T BEEN ENTERED IN LOCALDECLMAP\n";
+ VD->dumpColor();
llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?");
}
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index a983901f560de..ce00198c396b6 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -5414,6 +5414,10 @@ class CodeGenFunction : public CodeGenTypeCache {
/// Set the address of a local variable.
void setAddrOfLocalVar(const VarDecl *VD, Address Addr) {
+ if (LocalDeclMap.count(VD)) {
+ llvm::errs() << "Warning: VarDecl already exists in map: ";
+ VD->dumpColor();
+ }
assert(!LocalDeclMap.count(VD) && "Decl already exists in LocalDeclMap!");
LocalDeclMap.insert({VD, Addr});
}
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 556b5cb43b6f8..b0529c9352c83 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -22,6 +22,7 @@
#include "clang/AST/DeclOpenMP.h"
#include "clang/AST/DynamicRecursiveASTVisitor.h"
#include "clang/AST/OpenMPClause.h"
+#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/StmtCXX.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/AST/StmtVisitor.h"
@@ -47,6 +48,7 @@
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/IR/Assumptions.h"
#include <optional>
+#include <queue>
using namespace clang;
using namespace llvm::omp;
@@ -14157,6 +14159,45 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective(
getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
}
+// Overloaded base case function
+template <typename T, typename F>
+static bool tryHandleAs(T *t, F &&) {
+ return false;
+}
+
+/**
+ * Tries to recursively cast `t` to one of the given types and invokes `f` if successful.
+ *
+ * @tparam Class The first type to check.
+ * @tparam Rest The remaining types to check.
+ * @tparam T The base type of `t`.
+ * @tparam F The callable type for the function to invoke upon a successful cast.
+ * @param t The object to be checked.
+ * @param f The function to invoke if `t` matches `Class`.
+ * @return `true` if `t` matched any type and `f` was called, otherwise `false`.
+ */
+template <typename Class, typename... Rest, typename T, typename F>
+static bool tryHandleAs(T *t, F &&f) {
+ if (Class *c = dyn_cast<Class>(t)) {
+ f(c);
+ return true;
+ } else {
+ return tryHandleAs<Rest...>(t, std::forward<F>(f));
+ }
+}
+
+// Updates OriginalInits by checking Transform against loop transformation
+// directives and appending their pre-inits if a match is found.
+static void updatePreInits(OMPLoopBasedDirective *Transform,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &PreInits) {
+ if (!tryHandleAs<OMPTileDirective, OMPUnrollDirective, OMPReverseDirective,
+ OMPInterchangeDirective, OMPFuseDirective>(
+ Transform, [&PreInits](auto *Dir) {
+ appendFlattenedStmtList(PreInits.back(), Dir->getPreInits());
+ }))
+ llvm_unreachable("Unhandled loop transformation");
+}
+
bool SemaOpenMP::checkTransformableLoopNest(
OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops,
SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
@@ -14187,121 +14228,106 @@ bool SemaOpenMP::checkTransformableLoopNest(
return false;
},
[&OriginalInits](OMPLoopBasedDirective *Transform) {
- Stmt *DependentPreInits;
- if (auto *Dir = dyn_cast<OMPTileDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPStripeDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPUnrollDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPReverseDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPInterchangeDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPFuseDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else
- llvm_unreachable("Unhandled loop transformation");
-
- appendFlattenedStmtList(OriginalInits.back(), DependentPreInits);
+ updatePreInits(Transform, OriginalInits);
});
assert(OriginalInits.back().empty() && "No preinit after innermost loop");
OriginalInits.pop_back();
return Result;
}
-class NestedLoopCounterVisitor
- : public clang::RecursiveASTVisitor<NestedLoopCounterVisitor> {
+// Counts the total number of nested loops, including the outermost loop (the
+// original loop). PRECONDITION of this visitor is that it must be invoked from
+// the original loop to be analyzed. The traversal is stop for Decl's and
+// Expr's given that they may contain inner loops that must not be counted.
+//
+// Example AST structure for the code:
+//
+// int main() {
+// #pragma omp fuse
+// {
+// for (int i = 0; i < 100; i++) { <-- Outer loop
+// []() {
+// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP
+// };
+// for(int j = 0; j < 5; ++j) {} <-- Inner loop
+// }
+// for (int r = 0; i < 100; i++) { <-- Outer loop
+// struct LocalClass {
+// void bar() {
+// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP
+// }
+// };
+// for(int k = 0; k < 10; ++k) {} <-- Inner loop
+// {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP
+// }
+// }
+// }
+// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops
+class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor {
+private:
+ unsigned NestedLoopCount = 0;
+
public:
- explicit NestedLoopCounterVisitor() : NestedLoopCount(0) {}
+ explicit NestedLoopCounterVisitor() {}
- bool VisitForStmt(clang::ForStmt *FS) {
- ++NestedLoopCount;
- return true;
+ unsigned getNestedLoopCount() const { return NestedLoopCount; }
+
+ bool VisitForStmt(ForStmt *FS) override {
+ ++NestedLoopCount;
+ return true;
}
- bool VisitCXXForRangeStmt(clang::CXXForRangeStmt *FRS) {
- ++NestedLoopCount;
- return true;
+ bool VisitCXXForRangeStmt(CXXForRangeStmt *FRS) override {
+ ++NestedLoopCount;
+ return true;
}
- unsigned getNestedLoopCount() const { return NestedLoopCount; }
+ bool TraverseStmt(Stmt *S) override {
+ if (!S)
+ return true;
-private:
- unsigned NestedLoopCount;
+ // Skip traversal of all expressions, including special cases like
+ // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions
+ // may contain inner statements (and even loops), but they are not part
+ // of the syntactic body of the surrounding loop structure.
+ // Therefore must not be counted
+ if (isa<Expr>(S))
+ return true;
+
+ // Only recurse into CompoundStmt (block {}) and loop bodies
+ if (isa<CompoundStmt>(S) || isa<ForStmt>(S) ||
+ isa<CXXForRangeStmt>(S)) {
+ return DynamicRecursiveASTVisitor::TraverseStmt(S);
+ }
+
+ // Stop traversal of the rest of statements, that break perfect
+ // loop nesting, such as control flow (IfStmt, SwitchStmt...)
+ return true;
+ }
+
+ bool TraverseDecl(Decl *D) override {
+ // Stop in the case of finding a declaration, it is not important
+ // in order to find nested loops (Possible CXXRecordDecl, RecordDecl,
+ // FunctionDecl...)
+ return true;
+ }
};
-bool SemaOpenMP::checkTransformableLoopSequence(
- OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize,
- unsigned &NumLoops,
+bool SemaOpenMP::analyzeLoopSequence(
+ Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops,
SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
SmallVectorImpl<Stmt *> &ForStmts,
SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
- ASTContext &Context) {
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+ SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context,
+ OpenMPDirectiveKind Kind) {
- // Checks whether the given statement is a compound statement
VarsWithInheritedDSAType TmpDSA;
- if (!isa<CompoundStmt>(AStmt)) {
- Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence)
- << getOpenMPDirectiveName(Kind);
- return false;
- }
- // Callback for updating pre-inits in case there are even more
- // loop-sequence-generating-constructs inside of the main compound stmt
- auto OnTransformationCallback =
- [&OriginalInits](OMPLoopBasedDirective *Transform) {
- Stmt *DependentPreInits;
- if (auto *Dir = dyn_cast<OMPTileDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPUnrollDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPReverseDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPInterchangeDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPFuseDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else
- llvm_unreachable("Unhandled loop transformation");
-
- appendFlattenedStmtList(OriginalInits.back(), DependentPreInits);
- };
-
- // Number of top level canonical loop nests observed (And acts as index)
- LoopSeqSize = 0;
- // Number of total observed loops
- NumLoops = 0;
-
- // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows
- // the grammar:
- //
- // canonical-loop-sequence:
- // {
- // loop-sequence+
- // }
- // where loop-sequence can be any of the following:
- // 1. canonical-loop-sequence
- // 2. loop-nest
- // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective)
- //
- // To recognise and traverse this structure the following helper functions
- // have been defined. handleLoopSequence serves as the recurisve entry point
- // and tries to match the input AST to the canonical loop sequence grammar
- // structure
-
- // Helper functions to validate canonical loop sequence grammar is valid
- auto isLoopSequenceDerivation = [](auto *Child) {
- return isa<ForStmt>(Child) || isa<CXXForRangeStmt>(Child) ||
- isa<OMPLoopTransformationDirective>(Child);
- };
- auto isLoopGeneratingStmt = [](auto *Child) {
- return isa<OMPLoopTransformationDirective>(Child);
- };
-
+ QualType BaseInductionVarType;
// Helper Lambda to handle storing initialization and body statements for both
// ForStmt and CXXForRangeStmt and checks for any possible mismatch between
// induction variables types
- QualType BaseInductionVarType;
auto storeLoopStatements = [&OriginalInits, &ForStmts, &BaseInductionVarType,
this, &Context](Stmt *LoopStmt) {
if (auto *For = dyn_cast<ForStmt>(LoopStmt)) {
@@ -14324,33 +14350,35 @@ bool SemaOpenMP::checkTransformableLoopSequence(
}
}
}
-
} else {
- assert(isa<CXXForRangeStmt>(LoopStmt) &&
- "Expected canonical for or range-based for loops.");
- auto *CXXFor = dyn_cast<CXXForRangeStmt>(LoopStmt);
+ auto *CXXFor = cast<CXXForRangeStmt>(LoopStmt);
OriginalInits.back().push_back(CXXFor->getBeginStmt());
ForStmts.push_back(CXXFor);
}
};
+
// Helper lambda functions to encapsulate the processing of different
// derivations of the canonical loop sequence grammar
//
// Modularized code for handling loop generation and transformations
- auto handleLoopGeneration = [&storeLoopStatements, &LoopHelpers,
- &OriginalInits, &LoopSeqSize, &NumLoops, Kind,
- &TmpDSA, &OnTransformationCallback,
- this](Stmt *Child) {
+ auto analyzeLoopGeneration = [&storeLoopStatements, &LoopHelpers,
+ &OriginalInits, &TransformsPreInits,
+ &LoopCategories, &LoopSeqSize, &NumLoops, Kind,
+ &TmpDSA, &ForStmts, &Context,
+ &LoopSequencePreInits, this](Stmt *Child) {
auto LoopTransform = dyn_cast<OMPLoopTransformationDirective>(Child);
Stmt *TransformedStmt = LoopTransform->getTransformedStmt();
unsigned NumGeneratedLoopNests = LoopTransform->getNumGeneratedLoopNests();
-
+ unsigned NumGeneratedLoops = LoopTransform->getNumGeneratedLoops();
// Handle the case where transformed statement is not available due to
// dependent contexts
if (!TransformedStmt) {
- if (NumGeneratedLoopNests > 0)
+ if (NumGeneratedLoopNests > 0) {
+ LoopSeqSize += NumGeneratedLoopNests;
+ NumLoops += NumGeneratedLoops;
return true;
- // Unroll full
+ }
+ // Unroll full (0 loops produced)
else {
Diag(Child->getBeginLoc(), diag::err_omp_not_for)
<< 0 << getOpenMPDirectiveName(Kind);
@@ -14363,38 +14391,56 @@ bool SemaOpenMP::checkTransformableLoopSequence(
Diag(Child->getBeginLoc(), diag::err_omp_not_for)
<< 0 << getOpenMPDirectiveName(Kind);
return false;
- // Future loop transformations that generate multiple canonical loops
- } else if (NumGeneratedLoopNests > 1) {
- llvm_unreachable("Multiple canonical loop generating transformations "
- "like loop splitting are not yet supported");
}
+ // Loop transformatons such as split or loopranged fuse
+ else if (NumGeneratedLoopNests > 1) {
+ // Get the preinits related to this loop sequence generating
+ // loop transformation (i.e loopranged fuse, split...)
+ LoopSequencePreInits.emplace_back();
+ // These preinits differ slightly from regular inits/pre-inits related
+ // to single loop generating loop transformations (interchange, unroll)
+ // given that they are not bounded to a particular loop nest
+ // so they need to be treated independently
+ updatePreInits(LoopTransform, LoopSequencePreInits);
+ return analyzeLoopSequence(TransformedStmt, LoopSeqSize, NumLoops,
+ LoopHelpers, ForStmts, OriginalInits,
+ TransformsPreInits, LoopSequencePreInits,
+ LoopCategories, Context, Kind);
+ }
+ // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all)
+ else {
+ // Process the transformed loop statement
+ OriginalInits.emplace_back();
+ TransformsPreInits.emplace_back();
+ LoopHelpers.emplace_back();
+ LoopCategories.push_back(OMPLoopCategory::TransformSingleLoop);
+
+ unsigned IsCanonical =
+ checkOpenMPLoop(Kind, nullptr, nullptr, TransformedStmt, SemaRef,
+ *DSAStack, TmpDSA, LoopHelpers[LoopSeqSize]);
+
+ if (!IsCanonical) {
+ Diag(TransformedStmt->getBeginLoc(), diag::err_omp_not_canonical_loop)
+ << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+ storeLoopStatements(TransformedStmt);
+ updatePreInits(LoopTransform, TransformsPreInits);
- // Process the transformed loop statement
- Child = TransformedStmt;
- OriginalInits.emplace_back();
- LoopHelpers.emplace_back();
- OnTransformationCallback(LoopTransform);
-
- unsigned IsCanonical =
- checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack,
- TmpDSA, LoopHelpers[LoopSeqSize]);
-
- if (!IsCanonical) {
- Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop)
- << getOpenMPDirectiveName(Kind);
- return false;
+ NumLoops += NumGeneratedLoops;
+ ++LoopSeqSize;
+ return true;
}
- storeLoopStatements(TransformedStmt);
- NumLoops += LoopTransform->getNumGeneratedLoops();
- return true;
};
// Modularized code for handling regular canonical loops
- auto handleRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits,
- &LoopSeqSize, &NumLoops, Kind, &TmpDSA,
- this](Stmt *Child) {
+ auto analyzeRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits,
+ &LoopSeqSize, &NumLoops, Kind, &TmpDSA,
+ &LoopCategories, this](Stmt *Child) {
OriginalInits.emplace_back();
LoopHelpers.emplace_back();
+ LoopCategories.push_back(OMPLoopCategory::RegularLoop);
+
unsigned IsCanonical =
checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack,
TmpDSA, LoopHelpers[LoopSeqSize]);
@@ -14412,57 +14458,114 @@ bool SemaOpenMP::checkTransformableLoopSequence(
return true;
};
- // Helper function to process a Loop Sequence Recursively
- auto handleLoopSequence = [&](Stmt *LoopSeqStmt,
- auto &handleLoopSequenceCallback) -> bool {
- for (auto *Child : LoopSeqStmt->children()) {
- if (!Child)
- continue;
+ // Helper functions to validate canonical loop sequence grammar is valid
+ auto isLoopSequenceDerivation = [](auto *Child) {
+ return isa<ForStmt>(Child) || isa<CXXForRangeStmt>(Child) ||
+ isa<OMPLoopTransformationDirective>(Child);
+ };
+ auto isLoopGeneratingStmt = [](auto *Child) {
+ return isa<OMPLoopTransformationDirective>(Child);
+ };
+
- // Skip over non-loop-sequence statements
- if (!isLoopSequenceDerivation(Child)) {
- Child = Child->IgnoreContainers();
+ // High level grammar validation
+ for (auto *Child : LoopSeqStmt->children()) {
- // Ignore empty compound statement
if (!Child)
- continue;
+ continue;
- // In the case of a nested loop sequence ignoring containers would not
- // be enough, a recurisve transversal of the loop sequence is required
- if (isa<CompoundStmt>(Child)) {
- if (!handleLoopSequenceCallback(Child, handleLoopSequenceCallback))
- return false;
- // Already been treated, skip this children
- continue;
+ // Skip over non-loop-sequence statements
+ if (!isLoopSequenceDerivation(Child)) {
+ Child = Child->IgnoreContainers();
+
+ // Ignore empty compound statement
+ if (!Child)
+ continue;
+
+ // In the case of a nested loop sequence ignoring containers would not
+ // be enough, a recurisve transversal of the loop sequence is required
+ if (isa<CompoundStmt>(Child)) {
+ if (!analyzeLoopSequence(Child, LoopSeqSize, NumLoops, LoopHelpers,
+ ForStmts, OriginalInits, TransformsPreInits,
+ LoopSequencePreInits, LoopCategories, Context,
+ Kind))
+ return false;
+ // Already been treated, skip this children
+ continue;
+ }
+ }
+ // Regular loop sequence handling
+ if (isLoopSequenceDerivation(Child)) {
+ if (isLoopGeneratingStmt(Child)) {
+ if (!analyzeLoopGeneration(Child)) {
+ return false;
}
+ // analyzeLoopGeneration updates Loop Sequence size accordingly
+
+ } else {
+ if (!analyzeRegularLoop(Child)) {
+ return false;
+ }
+ // Update the Loop Sequence size by one
+ ++LoopSeqSize;
}
- // Regular loop sequence handling
- if (isLoopSequenceDerivation(Child)) {
- if (isLoopGeneratingStmt(Child)) {
- if (!handleLoopGeneration(Child)) {
- return false;
- }
} else {
- if (!handleRegularLoop(Child)) {
- return false;
- }
+ // Report error for invalid statement inside canonical loop sequence
+ Diag(Child->getBeginLoc(), diag::err_omp_not_for)
+ << 0 << getOpenMPDirectiveName(Kind);
+ return false;
}
- ++LoopSeqSize;
- } else {
- // Report error for invalid statement inside canonical loop sequence
- Diag(Child->getBeginLoc(), diag::err_omp_not_for)
- << 0 << getOpenMPDirectiveName(Kind);
+ }
+ return true;
+}
+
+bool SemaOpenMP::checkTransformableLoopSequence(
+ OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize,
+ unsigned &NumLoops,
+ SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
+ SmallVectorImpl<Stmt *> &ForStmts,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+ SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context) {
+
+ // Checks whether the given statement is a compound statement
+ if (!isa<CompoundStmt>(AStmt)) {
+ Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence)
+ << getOpenMPDirectiveName(Kind);
return false;
- }
- }
- return true;
- };
+ }
+ // Number of top level canonical loop nests observed (And acts as index)
+ LoopSeqSize = 0;
+ // Number of total observed loops
+ NumLoops = 0;
+
+ // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows
+ // the grammar:
+ //
+ // canonical-loop-sequence:
+ // {
+ // loop-sequence+
+ // }
+ // where loop-sequence can be any of the following:
+ // 1. canonical-loop-sequence
+ // 2. loop-nest
+ // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective)
+ //
+ // To recognise and traverse this structure the following helper functions
+ // have been defined. analyzeLoopSequence serves as the recurisve entry point
+ // and tries to match the input AST to the canonical loop sequence grammar
+ // structure. This function will perform both a semantic and syntactical
+ // analysis of the given statement according to OpenMP 6.0 definition of
+ // the aforementioned canonical loop sequence
// Recursive entry point to process the main loop sequence
- if (!handleLoopSequence(AStmt, handleLoopSequence)) {
- return false;
+ if (!analyzeLoopSequence(AStmt, LoopSeqSize, NumLoops, LoopHelpers, ForStmts,
+ OriginalInits, TransformsPreInits,
+ LoopSequencePreInits, LoopCategories, Context,
+ Kind)) {
+ return false;
}
-
if (LoopSeqSize <= 0) {
Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence)
<< getOpenMPDirectiveName(Kind);
@@ -14494,9 +14597,7 @@ static void addLoopPreInits(ASTContext &Context,
RangeEnd->getBeginLoc(),
RangeEnd->getEndLoc()));
}
-
llvm::append_range(PreInits, OriginalInit);
-
// List of OMPCapturedExprDecl, for __begin, __end, and NumIterations
if (auto *PI = cast_or_null<DeclStmt>(LoopHelper.PreInits)) {
PreInits.push_back(new (Context) DeclStmt(
@@ -15177,7 +15278,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
Stmt *LoopStmt = nullptr;
collectLoopStmts(AStmt, {LoopStmt});
- // Determine the PreInit declarations.
+ // Determine the PreInit declarations.e
SmallVector<Stmt *, 4> PreInits;
addLoopPreInits(Context, LoopHelper, LoopStmt, OriginalInits[0], PreInits);
@@ -15744,28 +15845,35 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
if (!AStmt) {
return StmtError();
}
+
+ unsigned NumLoops = 1;
+ unsigned LoopSeqSize = 1;
+
+ // Defer transformation in dependent contexts
+ // The NumLoopNests argument is set to a placeholder 1 (even though
+ // using looprange fuse could yield up to 3 top level loop nests)
+ // because a dependent context could prevent determining its true value
+ if (CurrContext->isDependentContext()) {
+ return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses,
+ NumLoops, LoopSeqSize, AStmt, nullptr,
+ nullptr);
+ }
+
// Validate that the potential loop sequence is transformable for fusion
// Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops
SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers;
SmallVector<Stmt *> LoopStmts;
SmallVector<SmallVector<Stmt *, 0>> OriginalInits;
-
- unsigned NumLoops;
- unsigned LoopSeqSize;
+ SmallVector<SmallVector<Stmt *, 0>> TransformsPreInits;
+ SmallVector<SmallVector<Stmt *, 0>> LoopSequencePreInits;
+ SmallVector<OMPLoopCategory, 0> LoopCategories;
if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops,
LoopHelpers, LoopStmts, OriginalInits,
- Context)) {
+ TransformsPreInits, LoopSequencePreInits,
+ LoopCategories, Context)) {
return StmtError();
}
- // Defer transformation in dependent contexts
- // The NumLoopNests argument is set to a placeholder (0)
- // because a dependent context could prevent determining its true value
- if (CurrContext->isDependentContext()) {
- return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses,
- NumLoops, 0, AStmt, nullptr, nullptr);
- }
-
// Handle clauses, which can be any of the following: [looprange, apply]
const OMPLoopRangeClause *LRC =
OMPExecutableDirective::getSingleClause<OMPLoopRangeClause>(Clauses);
@@ -15827,11 +15935,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
"Expecting loop iteration space dimensionality to match number of "
"affected loops");
- // PreInits hold a sequence of variable declarations that must be executed
- // before the fused loop begins. These include bounds, strides, and other
- // helper variables required for the transformation.
- SmallVector<Stmt *> PreInits;
-
// Select the type with the largest bit width among all induction variables
QualType IVType = LoopHelpers[FirstVal - 1].IterationVarRef->getType();
for (unsigned int I = FirstVal; I < LastVal; ++I) {
@@ -15843,7 +15946,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
uint64_t IVBitWidth = Context.getIntWidth(IVType);
// Create pre-init declarations for all loops lower bounds, upper bounds,
- // strides and num-iterations
+ // strides and num-iterations for every top level loop in the fusion
SmallVector<VarDecl *, 4> LBVarDecls;
SmallVector<VarDecl *, 4> STVarDecls;
SmallVector<VarDecl *, 4> NIVarDecls;
@@ -15881,12 +15984,62 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
return std::make_pair(VD, DeclStmt);
};
+ // PreInits hold a sequence of variable declarations that must be executed
+ // before the fused loop begins. These include bounds, strides, and other
+ // helper variables required for the transformation. Other loop transforms
+ // also contain their own preinits
+ SmallVector<Stmt *> PreInits;
+ // Iterator to keep track of loop transformations
+ unsigned int TransformIndex = 0;
+
+ // Update the general preinits using the preinits generated by loop sequence
+ // generating loop transformations. These preinits differ slightly from
+ // single-loop transformation preinits, as they can be detached from a
+ // specific loop inside the multiple generated loop nests. This happens
+ // because certain helper variables, like '.omp.fuse.max', are introduced to
+ // handle fused iteration spaces and may not be directly tied to a single
+ // original loop. the preinit structure must ensure that hidden variables
+ // like '.omp.fuse.max' are still properly handled.
+ // Transformations that apply this concept: Loopranged Fuse, Split
+ if (!LoopSequencePreInits.empty()) {
+ for (const auto <PreInits : LoopSequencePreInits) {
+ if (!LTPreInits.empty()) {
+ llvm::append_range(PreInits, LTPreInits);
+ }
+ }
+ }
+
// Process each single loop to generate and collect declarations
- // and statements for all helper expressions
+ // and statements for all helper expressions related to
+ // particular single loop nests
+
+ // Also In the case of the fused loops, we keep track of their original
+ // inits by appending them to their preinits statement, and in the case of
+ // transformations, also append their preinits (which contain the original
+ // loop initialization statement or other statements)
+
+ // Firstly we need to update TransformIndex to match the begining of the
+ // looprange section
+ for (unsigned int I = 0; I < FirstVal - 1; ++I) {
+ if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop)
+ ++TransformIndex;
+ }
for (unsigned int I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) {
- addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I],
- PreInits);
+ if (LoopCategories[I] == OMPLoopCategory::RegularLoop) {
+ addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I],
+ PreInits);
+ } else if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) {
+ // For transformed loops, insert both pre-inits and original inits.
+ // Order matters: pre-inits may define variables used in the original
+ // inits such as upper bounds...
+ auto TransformPreInit = TransformsPreInits[TransformIndex++];
+ if (!TransformPreInit.empty()) {
+ llvm::append_range(PreInits, TransformPreInit);
+ }
+ addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I],
+ PreInits);
+ }
auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", J);
auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", J);
auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", J);
@@ -15905,7 +16058,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
NIVarDecls.push_back(NIVD);
IVVarDecls.push_back(IVVD);
- PreInits.push_back(UBDStmt.get());
PreInits.push_back(LBDStmt.get());
PreInits.push_back(STDStmt.get());
PreInits.push_back(NIDStmt.get());
@@ -16081,6 +16233,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
BodyStmts.push_back(IdxExpr.get());
llvm::append_range(BodyStmts, LoopHelpers[I].Updates);
+ // If the loop is a CXXForRangeStmt then the iterator variable is needed
if (auto *SourceCXXFor = dyn_cast<CXXForRangeStmt>(LoopStmts[I]))
BodyStmts.push_back(SourceCXXFor->getLoopVarStmt());
@@ -16115,21 +16268,50 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(),
IncrExpr.get()->getEndLoc());
- // In the case of looprange, the result of fuse won't simply
- // be a single loop (ForStmt), but rather a loop sequence
- // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop
- // and the post-fusion loops, preserving its original order.
+ // In the case of looprange, the result of fuse won't simply
+ // be a single loop (ForStmt), but rather a loop sequence
+ // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop
+ // and the post-fusion loops, preserving its original order.
+ //
+ // Note: If looprange clause produces a single fused loop nest then
+ // this compound statement wrapper is unnecessary (Therefore this
+ // treatment is skipped)
+
Stmt *FusionStmt = FusedForStmt;
- if (LRC) {
+ if (LRC && CountVal != LoopSeqSize) {
SmallVector<Stmt *, 4> FinalLoops;
- // Gather all the pre-fusion loops
- for (unsigned I = 0; I < FirstVal - 1; ++I)
- FinalLoops.push_back(LoopStmts[I]);
- // Gather the fused loop
- FinalLoops.push_back(FusedForStmt);
- // Gather all the post-fusion loops
- for (unsigned I = FirstVal + CountVal - 1; I < LoopSeqSize; ++I)
+ // Reset the transform index
+ TransformIndex = 0;
+
+ // Collect all non-fused loops before and after the fused region.
+ // Pre-fusion and post-fusion loops are inserted in order exploiting their
+ // symmetry, along with their corresponding transformation pre-inits if
+ // needed. The fused loop is added between the two regions.
+ for (unsigned I = 0; I < LoopSeqSize; ++I) {
+ if (I >= FirstVal - 1 && I < FirstVal + CountVal - 1) {
+ // Update the Transformation counter to skip already treated
+ // loop transformations
+ if (LoopCategories[I] != OMPLoopCategory::TransformSingleLoop)
+ ++TransformIndex;
+ continue;
+ }
+
+ // No need to handle:
+ // Regular loops: they are kept intact as-is.
+ // Loop-sequence-generating transformations: already handled earlier.
+ // Only TransformSingleLoop requires inserting pre-inits here
+
+ if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) {
+ auto TransformPreInit = TransformsPreInits[TransformIndex++];
+ if (!TransformPreInit.empty()) {
+ llvm::append_range(PreInits, TransformPreInit);
+ }
+ }
+
FinalLoops.push_back(LoopStmts[I]);
+ }
+
+ FinalLoops.insert(FinalLoops.begin() + (FirstVal - 1), FusedForStmt);
FusionStmt = CompoundStmt::Create(Context, FinalLoops, FPOptionsOverride(),
SourceLocation(), SourceLocation());
}
diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp
index ac4f0d38a9c68..9d85bd1172948 100644
--- a/clang/test/OpenMP/fuse_ast_print.cpp
+++ b/clang/test/OpenMP/fuse_ast_print.cpp
@@ -338,6 +338,61 @@ void tfoo9() {
foo9<1, 2>();
}
+// PRINT-LABEL: void foo10(
+// DUMP-LABEL: FunctionDecl {{.*}} foo10
+void foo10() {
+ // PRINT: #pragma omp fuse looprange(2,2)
+ // DUMP: OMPFuseDirective
+ // DUMP: OMPLooprangeClause
+ #pragma omp fuse looprange(2,2)
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i < 10; i += 2)
+ // DUMP: ForStmt
+ for (int i = 0; i < 10; i += 2)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: for (int ii = 0; ii < 10; ii += 2)
+ // DUMP: ForStmt
+ for (int ii = 0; ii < 10; ii += 2)
+ // PRINT: body(ii)
+ // DUMP: CallExpr
+ body(ii);
+ // PRINT: #pragma omp fuse looprange(2,2)
+ // DUMP: OMPFuseDirective
+ // DUMP: OMPLooprangeClause
+ #pragma omp fuse looprange(2,2)
+ {
+ // PRINT: for (int j = 10; j > 0; --j)
+ // DUMP: ForStmt
+ for (int j = 10; j > 0; --j)
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+ // PRINT: for (int jj = 10; jj > 0; --jj)
+ // DUMP: ForStmt
+ for (int jj = 10; jj > 0; --jj)
+ // PRINT: body(jj)
+ // DUMP: CallExpr
+ body(jj);
+ // PRINT: for (int k = 0; k <= 10; ++k)
+ // DUMP: ForStmt
+ for (int k = 0; k <= 10; ++k)
+ // PRINT: body(k)
+ // DUMP: CallExpr
+ body(k);
+ // PRINT: for (int kk = 0; kk <= 10; ++kk)
+ // DUMP: ForStmt
+ for (int kk = 0; kk <= 10; ++kk)
+ // PRINT: body(kk)
+ // DUMP: CallExpr
+ body(kk);
+ }
+ }
+
+}
diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp
index d9500bed3ce31..742c280ed0172 100644
--- a/clang/test/OpenMP/fuse_codegen.cpp
+++ b/clang/test/OpenMP/fuse_codegen.cpp
@@ -65,6 +65,23 @@ extern "C" void foo4() {
}
}
+// This exemplifies the usage of loop transformations that generate
+// more than top level canonical loop nests (e.g split, loopranged fuse...)
+extern "C" void foo5() {
+ double arr[256];
+ #pragma omp fuse looprange(2,2)
+ {
+ #pragma omp fuse looprange(2,2)
+ {
+ for(int i = 0; i < 128; ++i) body(i);
+ for(int j = 0; j < 256; j+=2) body(j);
+ for(int k = 0; k < 512; ++k) body(k);
+ }
+ for(int c = 42; auto &&v: arr) body(c,v);
+ for(int cc = 37; auto &&vv: arr) body(cc, vv);
+ }
+}
+
#endif
// CHECK1-LABEL: define dso_local void @body(
@@ -88,7 +105,6 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
@@ -97,7 +113,6 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -129,107 +144,103 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1
// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP9]], ptr [[J]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1
// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]]
// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
// CHECK1: [[COND_TRUE]]:
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
// CHECK1-NEXT: br label %[[COND_END:.*]]
// CHECK1: [[COND_FALSE]]:
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
// CHECK1-NEXT: br label %[[COND_END]]
// CHECK1: [[COND_END]]:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ]
// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
// CHECK1-NEXT: br label %[[FOR_COND:.*]]
// CHECK1: [[FOR_COND]]:
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
-// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]]
// CHECK1-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
// CHECK1: [[FOR_BODY]]:
-// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]]
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
// CHECK1-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
// CHECK1: [[IF_THEN]]:
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
-// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]]
-// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]]
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]]
+// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]]
// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
-// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
-// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]]
-// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]]
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]]
// CHECK1-NEXT: store i32 [[ADD20]], ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP35]])
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP33]])
// CHECK1-NEXT: br label %[[IF_END]]
// CHECK1: [[IF_END]]:
-// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]]
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]]
// CHECK1-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]]
// CHECK1: [[IF_THEN22]]:
-// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
-// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
-// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]]
-// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]]
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]]
+// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]]
// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4
-// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
-// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]]
-// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]]
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]]
+// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]]
// CHECK1-NEXT: store i32 [[ADD26]], ptr [[J]], align 4
-// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4
-// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]])
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP42]])
// CHECK1-NEXT: br label %[[IF_END27]]
// CHECK1: [[IF_END27]]:
// CHECK1-NEXT: br label %[[FOR_INC:.*]]
// CHECK1: [[FOR_INC]]:
-// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1
// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK1: [[FOR_END]]:
@@ -256,7 +267,6 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
@@ -265,7 +275,6 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -274,7 +283,6 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4
@@ -304,172 +312,166 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1
// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP9]], ptr [[J]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1
// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4
// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4
-// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4
-// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]]
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]]
// CHECK1-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
-// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]]
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
-// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]]
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]]
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]]
// CHECK1-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1
// CHECK1-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK1-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP29]], 1
// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]]
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]]
// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
// CHECK1: [[COND_TRUE]]:
-// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
// CHECK1-NEXT: br label %[[COND_END:.*]]
// CHECK1: [[COND_FALSE]]:
-// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
// CHECK1-NEXT: br label %[[COND_END]]
// CHECK1: [[COND_END]]:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ]
// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4
-// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
-// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
-// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]]
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]]
// CHECK1-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]]
// CHECK1: [[COND_TRUE30]]:
-// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
// CHECK1-NEXT: br label %[[COND_END32:.*]]
// CHECK1: [[COND_FALSE31]]:
-// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
// CHECK1-NEXT: br label %[[COND_END32]]
// CHECK1: [[COND_END32]]:
-// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ]
+// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ]
// CHECK1-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
// CHECK1-NEXT: br label %[[FOR_COND:.*]]
// CHECK1: [[FOR_COND]]:
-// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
-// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]]
// CHECK1-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
// CHECK1: [[FOR_BODY]]:
-// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]]
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]]
// CHECK1-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
// CHECK1: [[IF_THEN]]:
-// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
-// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
-// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]]
-// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]]
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]]
+// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]]
// CHECK1-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4
-// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
-// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
-// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]]
-// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]]
+// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]]
+// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]]
// CHECK1-NEXT: store i32 [[ADD38]], ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]])
+// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP49]])
// CHECK1-NEXT: br label %[[IF_END]]
// CHECK1: [[IF_END]]:
-// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]]
+// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]]
// CHECK1-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]]
// CHECK1: [[IF_THEN40]]:
-// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
-// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
-// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]]
-// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]]
+// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]]
+// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]]
// CHECK1-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4
-// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
-// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]]
-// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]]
+// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]]
+// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]]
// CHECK1-NEXT: store i32 [[SUB44]], ptr [[J]], align 4
-// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4
-// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]])
+// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP58]])
// CHECK1-NEXT: br label %[[IF_END45]]
// CHECK1: [[IF_END45]]:
-// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
-// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]]
+// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]]
// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
// CHECK1: [[IF_THEN47]]:
-// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
-// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
-// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]]
-// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]]
+// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
+// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
+// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]]
+// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]]
// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4
-// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
-// CHECK1-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
-// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]]
-// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]]
+// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
+// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]]
+// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]]
// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4
-// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4
-// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP70]])
+// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP67]])
// CHECK1-NEXT: br label %[[IF_END52]]
// CHECK1: [[IF_END52]]:
// CHECK1-NEXT: br label %[[FOR_INC:.*]]
// CHECK1: [[FOR_INC]]:
-// CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1
+// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP68]], 1
// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
// CHECK1: [[FOR_END]]:
@@ -481,13 +483,11 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[ENTRY:.*:]]
// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -497,48 +497,43 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[__END224:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__RANGE221:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__END222:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__BEGIN225:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
// CHECK1-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4
@@ -565,225 +560,219 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64
-// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8
+// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
// CHECK1-NEXT: store i32 42, ptr [[C]], align 4
// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
-// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
-// CHECK1-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8
-// CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8
-// CHECK1-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
-// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
-// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64
+// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK1-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
-// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
-// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1
-// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1
-// CHECK1-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1
-// CHECK1-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
-// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8
-// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8
-// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
-// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1
-// CHECK1-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8
+// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK1-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4
-// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8
-// CHECK1-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
-// CHECK1-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256
-// CHECK1-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8
-// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8
-// CHECK1-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8
-// CHECK1-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8
-// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
-// CHECK1-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64
-// CHECK1-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64
-// CHECK1-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]]
-// CHECK1-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8
-// CHECK1-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1
-// CHECK1-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1
-// CHECK1-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1
-// CHECK1-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1
-// CHECK1-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8
-// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
-// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8
+// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE221]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0
+// CHECK1-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256
+// CHECK1-NEXT: store ptr [[ADD_PTR24]], ptr [[__END222]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8
+// CHECK1-NEXT: store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK1-NEXT: [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64
+// CHECK1-NEXT: [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64
+// CHECK1-NEXT: [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]]
+// CHECK1-NEXT: [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8
+// CHECK1-NEXT: [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1
+// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1
+// CHECK1-NEXT: [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1
+// CHECK1-NEXT: [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1
+// CHECK1-NEXT: store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8
// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8
// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
-// CHECK1-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1
-// CHECK1-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8
-// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
-// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
-// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
-// CHECK1-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]]
-// CHECK1-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]]
-// CHECK1: [[COND_TRUE44]]:
-// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
-// CHECK1-NEXT: br label %[[COND_END46:.*]]
-// CHECK1: [[COND_FALSE45]]:
-// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
-// CHECK1-NEXT: br label %[[COND_END46]]
-// CHECK1: [[COND_END46]]:
-// CHECK1-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ]
-// CHECK1-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8
-// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
-// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
-// CHECK1-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]]
-// CHECK1-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]]
-// CHECK1: [[COND_TRUE50]]:
-// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
-// CHECK1-NEXT: br label %[[COND_END52:.*]]
-// CHECK1: [[COND_FALSE51]]:
-// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
-// CHECK1-NEXT: br label %[[COND_END52]]
-// CHECK1: [[COND_END52]]:
-// CHECK1-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ]
-// CHECK1-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8
-// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8
+// CHECK1-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1
+// CHECK1-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]]
+// CHECK1-NEXT: br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]]
+// CHECK1: [[COND_TRUE42]]:
+// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK1-NEXT: br label %[[COND_END44:.*]]
+// CHECK1: [[COND_FALSE43]]:
+// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: br label %[[COND_END44]]
+// CHECK1: [[COND_END44]]:
+// CHECK1-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ]
+// CHECK1-NEXT: store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]]
+// CHECK1-NEXT: br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]]
+// CHECK1: [[COND_TRUE48]]:
+// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT: br label %[[COND_END50:.*]]
+// CHECK1: [[COND_FALSE49]]:
+// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: br label %[[COND_END50]]
+// CHECK1: [[COND_END50]]:
+// CHECK1-NEXT: [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ]
+// CHECK1-NEXT: store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8
// CHECK1-NEXT: br label %[[FOR_COND:.*]]
// CHECK1: [[FOR_COND]]:
-// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8
-// CHECK1-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]]
-// CHECK1-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK1-NEXT: [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT: br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
// CHECK1: [[FOR_BODY]]:
-// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
-// CHECK1-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]]
-// CHECK1-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]]
+// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]]
+// CHECK1-NEXT: br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]]
// CHECK1: [[IF_THEN]]:
-// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4
-// CHECK1-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64
-// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4
-// CHECK1-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64
-// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]]
-// CHECK1-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]]
-// CHECK1-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32
-// CHECK1-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4
-// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4
-// CHECK1-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1
-// CHECK1-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]]
-// CHECK1-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK1-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]]
-// CHECK1-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]]
-// CHECK1: [[IF_THEN64]]:
-// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
-// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
-// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]]
-// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]]
-// CHECK1-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4
-// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
-// CHECK1-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1
-// CHECK1-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]]
-// CHECK1-NEXT: store i32 [[ADD68]], ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP48]])
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT: [[CONV55:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK1-NEXT: [[CONV56:%.*]] = sext i32 [[TMP36]] to i64
+// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]]
+// CHECK1-NEXT: [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]]
+// CHECK1-NEXT: [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32
+// CHECK1-NEXT: store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT: [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1
+// CHECK1-NEXT: [[ADD60:%.*]] = add nsw i32 0, [[MUL59]]
+// CHECK1-NEXT: store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]]
+// CHECK1-NEXT: br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]]
+// CHECK1: [[IF_THEN62]]:
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT: [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]]
+// CHECK1-NEXT: store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1
+// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 0, [[MUL65]]
+// CHECK1-NEXT: store i32 [[ADD66]], ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP45]])
// CHECK1-NEXT: br label %[[IF_END]]
// CHECK1: [[IF_END]]:
-// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK1-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]]
-// CHECK1-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]]
-// CHECK1: [[IF_THEN70]]:
-// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
-// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
-// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]]
-// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]]
-// CHECK1-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4
-// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
-// CHECK1-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2
-// CHECK1-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]]
-// CHECK1-NEXT: store i32 [[ADD74]], ptr [[J]], align 4
-// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4
-// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP55]])
-// CHECK1-NEXT: br label %[[IF_END75]]
-// CHECK1: [[IF_END75]]:
-// CHECK1-NEXT: br label %[[IF_END76]]
-// CHECK1: [[IF_END76]]:
-// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
-// CHECK1-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]]
-// CHECK1-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]]
-// CHECK1: [[IF_THEN78]]:
-// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8
-// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8
-// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]]
-// CHECK1-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]]
-// CHECK1-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8
-// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
-// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8
-// CHECK1-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1
-// CHECK1-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]]
-// CHECK1-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8
-// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
-// CHECK1-NEXT: store ptr [[TMP63]], ptr [[V]], align 8
-// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4
-// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8
-// CHECK1-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8
-// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]])
-// CHECK1-NEXT: br label %[[IF_END83]]
-// CHECK1: [[IF_END83]]:
-// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
-// CHECK1-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]]
-// CHECK1-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]]
-// CHECK1: [[IF_THEN85]]:
-// CHECK1-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
-// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
-// CHECK1-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]]
-// CHECK1-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]]
-// CHECK1-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8
-// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
-// CHECK1-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
-// CHECK1-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1
-// CHECK1-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]]
-// CHECK1-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8
-// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8
-// CHECK1-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8
-// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4
-// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8
-// CHECK1-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8
-// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]])
-// CHECK1-NEXT: br label %[[IF_END90]]
-// CHECK1: [[IF_END90]]:
+// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]]
+// CHECK1-NEXT: br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]]
+// CHECK1: [[IF_THEN68]]:
+// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]]
+// CHECK1-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]]
+// CHECK1-NEXT: store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2
+// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 0, [[MUL71]]
+// CHECK1-NEXT: store i32 [[ADD72]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]])
+// CHECK1-NEXT: br label %[[IF_END73]]
+// CHECK1: [[IF_END73]]:
+// CHECK1-NEXT: br label %[[IF_END74]]
+// CHECK1: [[IF_END74]]:
+// CHECK1-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]]
+// CHECK1-NEXT: br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]]
+// CHECK1: [[IF_THEN76]]:
+// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]]
+// CHECK1-NEXT: [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]]
+// CHECK1-NEXT: store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1
+// CHECK1-NEXT: [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]]
+// CHECK1-NEXT: store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: store ptr [[TMP60]], ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[C]], align 4
+// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]])
+// CHECK1-NEXT: br label %[[IF_END81]]
+// CHECK1: [[IF_END81]]:
+// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]]
+// CHECK1-NEXT: br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]]
+// CHECK1: [[IF_THEN83]]:
+// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
+// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
+// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]]
+// CHECK1-NEXT: [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]]
+// CHECK1-NEXT: store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8
+// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
+// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1
+// CHECK1-NEXT: [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]]
+// CHECK1-NEXT: store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8
+// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8
+// CHECK1-NEXT: store ptr [[TMP71]], ptr [[VV]], align 8
+// CHECK1-NEXT: [[TMP72:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK1-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]])
+// CHECK1-NEXT: br label %[[IF_END88]]
+// CHECK1: [[IF_END88]]:
// CHECK1-NEXT: br label %[[FOR_INC:.*]]
// CHECK1: [[FOR_INC]]:
-// CHECK1-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1
-// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP75]], 1
+// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8
// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
// CHECK1: [[FOR_END]]:
// CHECK1-NEXT: ret void
@@ -794,13 +783,11 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[ENTRY:.*:]]
// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -815,12 +802,10 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
// CHECK1-NEXT: store i32 0, ptr [[K]], align 4
-// CHECK1-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
// CHECK1-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4
@@ -940,6 +925,277 @@ extern "C" void foo4() {
// CHECK1-NEXT: ret void
//
//
+// CHECK1-LABEL: define dso_local void @foo5(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[__RANGE264:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__BEGIN265:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__END267:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: store i32 0, ptr [[J]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[K]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: store i32 512, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1: [[COND_TRUE]]:
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: br label %[[COND_END:.*]]
+// CHECK1: [[COND_FALSE]]:
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: br label %[[COND_END]]
+// CHECK1: [[COND_END]]:
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT: store i32 42, ptr [[C]], align 4
+// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
+// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK1-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK1-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]]
+// CHECK1-NEXT: br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]]
+// CHECK1: [[COND_TRUE24]]:
+// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK1-NEXT: br label %[[COND_END26:.*]]
+// CHECK1: [[COND_FALSE25]]:
+// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: br label %[[COND_END26]]
+// CHECK1: [[COND_END26]]:
+// CHECK1-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ]
+// CHECK1-NEXT: store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND:.*]]
+// CHECK1: [[FOR_COND]]:
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128
+// CHECK1-NEXT: br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1: [[FOR_BODY]]:
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP21]])
+// CHECK1-NEXT: br label %[[FOR_INC:.*]]
+// CHECK1: [[FOR_INC]]:
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1
+// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK1: [[FOR_END]]:
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND30:.*]]
+// CHECK1: [[FOR_COND30]]:
+// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK1-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]]
+// CHECK1-NEXT: br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]]
+// CHECK1: [[FOR_BODY32]]:
+// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT: [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]]
+// CHECK1-NEXT: br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]]
+// CHECK1: [[IF_THEN]]:
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT: [[CONV34:%.*]] = sext i32 [[TMP27]] to i64
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK1-NEXT: [[CONV35:%.*]] = sext i32 [[TMP28]] to i64
+// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]]
+// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]]
+// CHECK1-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32
+// CHECK1-NEXT: store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT: [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1
+// CHECK1-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]]
+// CHECK1-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT: br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]]
+// CHECK1: [[IF_THEN41]]:
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]]
+// CHECK1-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]]
+// CHECK1-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2
+// CHECK1-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]]
+// CHECK1-NEXT: store i32 [[ADD45]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP37]])
+// CHECK1-NEXT: br label %[[IF_END]]
+// CHECK1: [[IF_END]]:
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]]
+// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK1: [[IF_THEN47]]:
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]]
+// CHECK1-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]]
+// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1
+// CHECK1-NEXT: [[ADD51:%.*]] = add nsw i32 0, [[MUL50]]
+// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]])
+// CHECK1-NEXT: br label %[[IF_END52]]
+// CHECK1: [[IF_END52]]:
+// CHECK1-NEXT: br label %[[IF_END53]]
+// CHECK1: [[IF_END53]]:
+// CHECK1-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]]
+// CHECK1-NEXT: br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]]
+// CHECK1: [[IF_THEN55]]:
+// CHECK1-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]]
+// CHECK1-NEXT: [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]]
+// CHECK1-NEXT: store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT: [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1
+// CHECK1-NEXT: [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]]
+// CHECK1-NEXT: store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: store ptr [[TMP52]], ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[C]], align 4
+// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]])
+// CHECK1-NEXT: br label %[[IF_END60]]
+// CHECK1: [[IF_END60]]:
+// CHECK1-NEXT: br label %[[FOR_INC61:.*]]
+// CHECK1: [[FOR_INC61]]:
+// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: [[INC62:%.*]] = add nsw i64 [[TMP56]], 1
+// CHECK1-NEXT: store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND30]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK1: [[FOR_END63]]:
+// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4
+// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE264]], align 8
+// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0
+// CHECK1-NEXT: [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256
+// CHECK1-NEXT: store ptr [[ADD_PTR69]], ptr [[__END267]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND70:.*]]
+// CHECK1: [[FOR_COND70]]:
+// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8
+// CHECK1-NEXT: [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]]
+// CHECK1-NEXT: br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]]
+// CHECK1: [[FOR_BODY72]]:
+// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT: store ptr [[TMP61]], ptr [[VV]], align 8
+// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK1-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]])
+// CHECK1-NEXT: br label %[[FOR_INC73:.*]]
+// CHECK1: [[FOR_INC73]]:
+// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1
+// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND70]]
+// CHECK1: [[FOR_END74]]:
+// CHECK1-NEXT: ret void
+//
+//
// CHECK2-LABEL: define dso_local void @body(
// CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: [[ENTRY:.*:]]
@@ -961,7 +1217,6 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
@@ -970,7 +1225,6 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -1002,107 +1256,103 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1
// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP9]], ptr [[J]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1
// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
-// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]]
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]]
// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
// CHECK2: [[COND_TRUE]]:
-// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
// CHECK2-NEXT: br label %[[COND_END:.*]]
// CHECK2: [[COND_FALSE]]:
-// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
// CHECK2-NEXT: br label %[[COND_END]]
// CHECK2: [[COND_END]]:
-// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ]
// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
// CHECK2-NEXT: br label %[[FOR_COND:.*]]
// CHECK2: [[FOR_COND]]:
-// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
-// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]]
// CHECK2-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
// CHECK2: [[FOR_BODY]]:
-// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]]
+// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
// CHECK2-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
// CHECK2: [[IF_THEN]]:
-// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
-// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
-// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]]
-// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]]
+// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]]
+// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]]
// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4
-// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
-// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
-// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]]
-// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]]
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]]
// CHECK2-NEXT: store i32 [[ADD20]], ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4
-// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP35]])
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP33]])
// CHECK2-NEXT: br label %[[IF_END]]
// CHECK2: [[IF_END]]:
-// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]]
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]]
// CHECK2-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]]
// CHECK2: [[IF_THEN22]]:
-// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
-// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
-// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]]
-// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]]
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]]
+// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]]
// CHECK2-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4
-// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
-// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]]
-// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]]
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]]
+// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]]
// CHECK2-NEXT: store i32 [[ADD26]], ptr [[J]], align 4
-// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4
-// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]])
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP42]])
// CHECK2-NEXT: br label %[[IF_END27]]
// CHECK2: [[IF_END27]]:
// CHECK2-NEXT: br label %[[FOR_INC:.*]]
// CHECK2: [[FOR_INC]]:
-// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1
// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK2: [[FOR_END]]:
@@ -1114,13 +1364,11 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[ENTRY:.*:]]
// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -1130,48 +1378,43 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[__END224:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__RANGE221:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__END222:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__BEGIN225:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
// CHECK2-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4
@@ -1198,225 +1441,219 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64
-// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8
+// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
// CHECK2-NEXT: store i32 42, ptr [[C]], align 4
// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
-// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
-// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
-// CHECK2-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
-// CHECK2-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8
-// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8
-// CHECK2-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0
-// CHECK2-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8
-// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8
-// CHECK2-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8
-// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8
-// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
-// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
-// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64
+// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK2-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
-// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
-// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1
-// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1
-// CHECK2-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1
-// CHECK2-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8
-// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
-// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8
-// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8
-// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8
-// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
-// CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1
-// CHECK2-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8
+// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK2-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4
-// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8
-// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8
-// CHECK2-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
-// CHECK2-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256
-// CHECK2-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8
-// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8
-// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0
-// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8
-// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8
-// CHECK2-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0
-// CHECK2-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8
-// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8
-// CHECK2-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8
-// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8
-// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
-// CHECK2-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64
-// CHECK2-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64
-// CHECK2-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]]
-// CHECK2-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8
-// CHECK2-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1
-// CHECK2-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1
-// CHECK2-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1
-// CHECK2-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1
-// CHECK2-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8
-// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
-// CHECK2-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8
+// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE221]], align 8
+// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0
+// CHECK2-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256
+// CHECK2-NEXT: store ptr [[ADD_PTR24]], ptr [[__END222]], align 8
+// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8
+// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8
+// CHECK2-NEXT: store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK2-NEXT: [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64
+// CHECK2-NEXT: [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64
+// CHECK2-NEXT: [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]]
+// CHECK2-NEXT: [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8
+// CHECK2-NEXT: [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1
+// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1
+// CHECK2-NEXT: [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1
+// CHECK2-NEXT: [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1
+// CHECK2-NEXT: store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8
// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8
// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8
-// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
-// CHECK2-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1
-// CHECK2-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8
-// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
-// CHECK2-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8
-// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
-// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
-// CHECK2-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]]
-// CHECK2-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]]
-// CHECK2: [[COND_TRUE44]]:
-// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
-// CHECK2-NEXT: br label %[[COND_END46:.*]]
-// CHECK2: [[COND_FALSE45]]:
-// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
-// CHECK2-NEXT: br label %[[COND_END46]]
-// CHECK2: [[COND_END46]]:
-// CHECK2-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ]
-// CHECK2-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8
-// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
-// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
-// CHECK2-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]]
-// CHECK2-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]]
-// CHECK2: [[COND_TRUE50]]:
-// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
-// CHECK2-NEXT: br label %[[COND_END52:.*]]
-// CHECK2: [[COND_FALSE51]]:
-// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
-// CHECK2-NEXT: br label %[[COND_END52]]
-// CHECK2: [[COND_END52]]:
-// CHECK2-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ]
-// CHECK2-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8
-// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8
+// CHECK2-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1
+// CHECK2-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]]
+// CHECK2-NEXT: br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]]
+// CHECK2: [[COND_TRUE42]]:
+// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK2-NEXT: br label %[[COND_END44:.*]]
+// CHECK2: [[COND_FALSE43]]:
+// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: br label %[[COND_END44]]
+// CHECK2: [[COND_END44]]:
+// CHECK2-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ]
+// CHECK2-NEXT: store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]]
+// CHECK2-NEXT: br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]]
+// CHECK2: [[COND_TRUE48]]:
+// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT: br label %[[COND_END50:.*]]
+// CHECK2: [[COND_FALSE49]]:
+// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: br label %[[COND_END50]]
+// CHECK2: [[COND_END50]]:
+// CHECK2-NEXT: [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ]
+// CHECK2-NEXT: store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8
// CHECK2-NEXT: br label %[[FOR_COND:.*]]
// CHECK2: [[FOR_COND]]:
-// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8
-// CHECK2-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]]
-// CHECK2-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK2-NEXT: [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT: br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
// CHECK2: [[FOR_BODY]]:
-// CHECK2-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
-// CHECK2-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]]
-// CHECK2-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]]
+// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]]
+// CHECK2-NEXT: br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]]
// CHECK2: [[IF_THEN]]:
-// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4
-// CHECK2-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64
-// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4
-// CHECK2-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64
-// CHECK2-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]]
-// CHECK2-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]]
-// CHECK2-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32
-// CHECK2-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4
-// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4
-// CHECK2-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1
-// CHECK2-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]]
-// CHECK2-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK2-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]]
-// CHECK2-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]]
-// CHECK2: [[IF_THEN64]]:
-// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
-// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
-// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]]
-// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]]
-// CHECK2-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4
-// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
-// CHECK2-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1
-// CHECK2-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]]
-// CHECK2-NEXT: store i32 [[ADD68]], ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4
-// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP48]])
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT: [[CONV55:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK2-NEXT: [[CONV56:%.*]] = sext i32 [[TMP36]] to i64
+// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]]
+// CHECK2-NEXT: [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]]
+// CHECK2-NEXT: [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32
+// CHECK2-NEXT: store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT: [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1
+// CHECK2-NEXT: [[ADD60:%.*]] = add nsw i32 0, [[MUL59]]
+// CHECK2-NEXT: store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]]
+// CHECK2-NEXT: br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]]
+// CHECK2: [[IF_THEN62]]:
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT: [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]]
+// CHECK2-NEXT: store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1
+// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 0, [[MUL65]]
+// CHECK2-NEXT: store i32 [[ADD66]], ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP45]])
// CHECK2-NEXT: br label %[[IF_END]]
// CHECK2: [[IF_END]]:
-// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK2-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]]
-// CHECK2-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]]
-// CHECK2: [[IF_THEN70]]:
-// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
-// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
-// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]]
-// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]]
-// CHECK2-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4
-// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
-// CHECK2-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2
-// CHECK2-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]]
-// CHECK2-NEXT: store i32 [[ADD74]], ptr [[J]], align 4
-// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4
-// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP55]])
-// CHECK2-NEXT: br label %[[IF_END75]]
-// CHECK2: [[IF_END75]]:
-// CHECK2-NEXT: br label %[[IF_END76]]
-// CHECK2: [[IF_END76]]:
-// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
-// CHECK2-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]]
-// CHECK2-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]]
-// CHECK2: [[IF_THEN78]]:
-// CHECK2-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8
-// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8
-// CHECK2-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]]
-// CHECK2-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]]
-// CHECK2-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8
-// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
-// CHECK2-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8
-// CHECK2-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1
-// CHECK2-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]]
-// CHECK2-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8
-// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
-// CHECK2-NEXT: store ptr [[TMP63]], ptr [[V]], align 8
-// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4
-// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8
-// CHECK2-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8
-// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]])
-// CHECK2-NEXT: br label %[[IF_END83]]
-// CHECK2: [[IF_END83]]:
-// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
-// CHECK2-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]]
-// CHECK2-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]]
-// CHECK2: [[IF_THEN85]]:
-// CHECK2-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
-// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
-// CHECK2-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]]
-// CHECK2-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]]
-// CHECK2-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8
-// CHECK2-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
-// CHECK2-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
-// CHECK2-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1
-// CHECK2-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]]
-// CHECK2-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8
-// CHECK2-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8
-// CHECK2-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8
-// CHECK2-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4
-// CHECK2-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8
-// CHECK2-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8
-// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]])
-// CHECK2-NEXT: br label %[[IF_END90]]
-// CHECK2: [[IF_END90]]:
+// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]]
+// CHECK2-NEXT: br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]]
+// CHECK2: [[IF_THEN68]]:
+// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]]
+// CHECK2-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]]
+// CHECK2-NEXT: store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2
+// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 0, [[MUL71]]
+// CHECK2-NEXT: store i32 [[ADD72]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]])
+// CHECK2-NEXT: br label %[[IF_END73]]
+// CHECK2: [[IF_END73]]:
+// CHECK2-NEXT: br label %[[IF_END74]]
+// CHECK2: [[IF_END74]]:
+// CHECK2-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]]
+// CHECK2-NEXT: br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]]
+// CHECK2: [[IF_THEN76]]:
+// CHECK2-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]]
+// CHECK2-NEXT: [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]]
+// CHECK2-NEXT: store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1
+// CHECK2-NEXT: [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]]
+// CHECK2-NEXT: store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: store ptr [[TMP60]], ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[C]], align 4
+// CHECK2-NEXT: [[TMP62:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]])
+// CHECK2-NEXT: br label %[[IF_END81]]
+// CHECK2: [[IF_END81]]:
+// CHECK2-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]]
+// CHECK2-NEXT: br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]]
+// CHECK2: [[IF_THEN83]]:
+// CHECK2-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
+// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
+// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]]
+// CHECK2-NEXT: [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]]
+// CHECK2-NEXT: store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8
+// CHECK2-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
+// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1
+// CHECK2-NEXT: [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]]
+// CHECK2-NEXT: store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8
+// CHECK2-NEXT: [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8
+// CHECK2-NEXT: store ptr [[TMP71]], ptr [[VV]], align 8
+// CHECK2-NEXT: [[TMP72:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK2-NEXT: [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK2-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]])
+// CHECK2-NEXT: br label %[[IF_END88]]
+// CHECK2: [[IF_END88]]:
// CHECK2-NEXT: br label %[[FOR_INC:.*]]
// CHECK2: [[FOR_INC]]:
-// CHECK2-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1
-// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP75]], 1
+// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8
// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
// CHECK2: [[FOR_END]]:
// CHECK2-NEXT: ret void
@@ -1427,13 +1664,11 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[ENTRY:.*:]]
// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -1448,12 +1683,10 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
// CHECK2-NEXT: store i32 0, ptr [[K]], align 4
-// CHECK2-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
// CHECK2-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4
@@ -1573,6 +1806,277 @@ extern "C" void foo4() {
// CHECK2-NEXT: ret void
//
//
+// CHECK2-LABEL: define dso_local void @foo5(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[__RANGE264:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__BEGIN265:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__END267:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: store i32 0, ptr [[J]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[K]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: store i32 512, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2: [[COND_TRUE]]:
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: br label %[[COND_END:.*]]
+// CHECK2: [[COND_FALSE]]:
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: br label %[[COND_END]]
+// CHECK2: [[COND_END]]:
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT: store i32 42, ptr [[C]], align 4
+// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
+// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK2-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK2-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]]
+// CHECK2-NEXT: br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]]
+// CHECK2: [[COND_TRUE24]]:
+// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK2-NEXT: br label %[[COND_END26:.*]]
+// CHECK2: [[COND_FALSE25]]:
+// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: br label %[[COND_END26]]
+// CHECK2: [[COND_END26]]:
+// CHECK2-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ]
+// CHECK2-NEXT: store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND:.*]]
+// CHECK2: [[FOR_COND]]:
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128
+// CHECK2-NEXT: br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2: [[FOR_BODY]]:
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP21]])
+// CHECK2-NEXT: br label %[[FOR_INC:.*]]
+// CHECK2: [[FOR_INC]]:
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1
+// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK2: [[FOR_END]]:
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND30:.*]]
+// CHECK2: [[FOR_COND30]]:
+// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK2-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]]
+// CHECK2-NEXT: br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]]
+// CHECK2: [[FOR_BODY32]]:
+// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT: [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]]
+// CHECK2-NEXT: br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]]
+// CHECK2: [[IF_THEN]]:
+// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT: [[CONV34:%.*]] = sext i32 [[TMP27]] to i64
+// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK2-NEXT: [[CONV35:%.*]] = sext i32 [[TMP28]] to i64
+// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]]
+// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]]
+// CHECK2-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32
+// CHECK2-NEXT: store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT: [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1
+// CHECK2-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]]
+// CHECK2-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT: br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]]
+// CHECK2: [[IF_THEN41]]:
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]]
+// CHECK2-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]]
+// CHECK2-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2
+// CHECK2-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]]
+// CHECK2-NEXT: store i32 [[ADD45]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP37]])
+// CHECK2-NEXT: br label %[[IF_END]]
+// CHECK2: [[IF_END]]:
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]]
+// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK2: [[IF_THEN47]]:
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]]
+// CHECK2-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]]
+// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1
+// CHECK2-NEXT: [[ADD51:%.*]] = add nsw i32 0, [[MUL50]]
+// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]])
+// CHECK2-NEXT: br label %[[IF_END52]]
+// CHECK2: [[IF_END52]]:
+// CHECK2-NEXT: br label %[[IF_END53]]
+// CHECK2: [[IF_END53]]:
+// CHECK2-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]]
+// CHECK2-NEXT: br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]]
+// CHECK2: [[IF_THEN55]]:
+// CHECK2-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]]
+// CHECK2-NEXT: [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]]
+// CHECK2-NEXT: store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT: [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1
+// CHECK2-NEXT: [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]]
+// CHECK2-NEXT: store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: store ptr [[TMP52]], ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[C]], align 4
+// CHECK2-NEXT: [[TMP54:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]])
+// CHECK2-NEXT: br label %[[IF_END60]]
+// CHECK2: [[IF_END60]]:
+// CHECK2-NEXT: br label %[[FOR_INC61:.*]]
+// CHECK2: [[FOR_INC61]]:
+// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: [[INC62:%.*]] = add nsw i64 [[TMP56]], 1
+// CHECK2-NEXT: store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND30]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK2: [[FOR_END63]]:
+// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4
+// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE264]], align 8
+// CHECK2-NEXT: [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0
+// CHECK2-NEXT: [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256
+// CHECK2-NEXT: store ptr [[ADD_PTR69]], ptr [[__END267]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND70:.*]]
+// CHECK2: [[FOR_COND70]]:
+// CHECK2-NEXT: [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8
+// CHECK2-NEXT: [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]]
+// CHECK2-NEXT: br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]]
+// CHECK2: [[FOR_BODY72]]:
+// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT: store ptr [[TMP61]], ptr [[VV]], align 8
+// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK2-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]])
+// CHECK2-NEXT: br label %[[FOR_INC73:.*]]
+// CHECK2: [[FOR_INC73]]:
+// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1
+// CHECK2-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND70]]
+// CHECK2: [[FOR_END74]]:
+// CHECK2-NEXT: ret void
+//
+//
// CHECK2-LABEL: define dso_local void @tfoo2(
// CHECK2-SAME: ) #[[ATTR0]] {
// CHECK2-NEXT: [[ENTRY:.*:]]
@@ -1593,7 +2097,6 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
@@ -1602,7 +2105,6 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -1611,7 +2113,6 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4
@@ -1641,174 +2142,168 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1
// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP9]], ptr [[J]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1
// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4
// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4
-// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4
-// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
// CHECK2-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4
-// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4
-// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
-// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]]
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]]
// CHECK2-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1
-// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
-// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]]
-// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
-// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]]
+// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]]
+// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]]
// CHECK2-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1
// CHECK2-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK2-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4
-// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1
+// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP29]], 1
// CHECK2-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4
-// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK2-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]]
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]]
// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
// CHECK2: [[COND_TRUE]]:
-// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
// CHECK2-NEXT: br label %[[COND_END:.*]]
// CHECK2: [[COND_FALSE]]:
-// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
// CHECK2-NEXT: br label %[[COND_END]]
// CHECK2: [[COND_END]]:
-// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ]
// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4
-// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
-// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
-// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]]
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]]
// CHECK2-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]]
// CHECK2: [[COND_TRUE30]]:
-// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
// CHECK2-NEXT: br label %[[COND_END32:.*]]
// CHECK2: [[COND_FALSE31]]:
-// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
// CHECK2-NEXT: br label %[[COND_END32]]
// CHECK2: [[COND_END32]]:
-// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ]
+// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ]
// CHECK2-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
// CHECK2-NEXT: br label %[[FOR_COND:.*]]
// CHECK2: [[FOR_COND]]:
-// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
-// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]]
// CHECK2-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
// CHECK2: [[FOR_BODY]]:
-// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]]
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]]
// CHECK2-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
// CHECK2: [[IF_THEN]]:
-// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
-// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
-// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]]
-// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]]
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]]
+// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]]
// CHECK2-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4
-// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
-// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
-// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]]
-// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]]
+// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]]
+// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]]
// CHECK2-NEXT: store i32 [[ADD38]], ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4
-// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]])
+// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP49]])
// CHECK2-NEXT: br label %[[IF_END]]
// CHECK2: [[IF_END]]:
-// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]]
+// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]]
// CHECK2-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]]
// CHECK2: [[IF_THEN40]]:
-// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
-// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
-// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]]
-// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]]
+// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]]
+// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]]
// CHECK2-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4
-// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
-// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]]
-// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]]
+// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]]
+// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]]
// CHECK2-NEXT: store i32 [[SUB44]], ptr [[J]], align 4
-// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4
-// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]])
+// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP58]])
// CHECK2-NEXT: br label %[[IF_END45]]
// CHECK2: [[IF_END45]]:
-// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
-// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]]
+// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]]
// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
// CHECK2: [[IF_THEN47]]:
-// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
-// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
-// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]]
-// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]]
+// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
+// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
+// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]]
+// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]]
// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4
-// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
-// CHECK2-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
-// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]]
-// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]]
+// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
+// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]]
+// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]]
// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4
-// CHECK2-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4
-// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP70]])
+// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP67]])
// CHECK2-NEXT: br label %[[IF_END52]]
// CHECK2: [[IF_END52]]:
// CHECK2-NEXT: br label %[[FOR_INC:.*]]
// CHECK2: [[FOR_INC]]:
-// CHECK2-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1
+// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP68]], 1
// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK2: [[FOR_END]]:
// CHECK2-NEXT: ret void
//
@@ -1819,6 +2314,8 @@ extern "C" void foo4() {
// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
// CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]}
// CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
+// CHECK1: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]}
+// CHECK1: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]}
//.
// CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
// CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"}
@@ -1826,4 +2323,6 @@ extern "C" void foo4() {
// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
// CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]}
// CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
+// CHECK2: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]}
+// CHECK2: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]}
//.
>From 823bc08b4ef97458665ed41409e03cd07598efd3 Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:44:48 +0000
Subject: [PATCH 5/9] Fixed missing diagnostic groups in warnings
---
clang/include/clang/Basic/DiagnosticSemaKinds.td | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 191618e7865dc..a6ae0de004c8a 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11559,7 +11559,8 @@ def note_omp_implicit_dsa : Note<
def err_omp_loop_var_dsa : Error<
"loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">;
def warn_omp_different_loop_ind_var_types : Warning <
- "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">;
+ "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">,
+ InGroup<OpenMPLoopForm>;
def err_omp_not_canonical_loop : Error <
"loop after '#pragma omp %0' is not in canonical form">;
def err_omp_not_a_loop_sequence : Error <
@@ -11570,7 +11571,8 @@ def err_omp_invalid_looprange : Error <
"loop range in '#pragma omp %0' exceeds the number of available loops: "
"range end '%1' is greater than the total number of loops '%2'">;
def warn_omp_redundant_fusion : Warning <
- "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">;
+ "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">,
+ InGroup<OpenMPClauses>;
def err_omp_not_for : Error<
"%select{statement after '#pragma omp %1' must be a for loop|"
"expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">;
>From 422ffd7ef80a83156037a34c6ad955e67c504b4d Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:49:50 +0000
Subject: [PATCH 6/9] Fixed formatting and comments
---
clang/lib/Sema/SemaOpenMP.cpp | 112 ++++++++++++++++++----------------
1 file changed, 58 insertions(+), 54 deletions(-)
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index b0529c9352c83..485eebf23ef93 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -14160,42 +14160,43 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective(
}
// Overloaded base case function
-template <typename T, typename F>
-static bool tryHandleAs(T *t, F &&) {
- return false;
+template <typename T, typename F> static bool tryHandleAs(T *t, F &&) {
+ return false;
}
/**
- * Tries to recursively cast `t` to one of the given types and invokes `f` if successful.
+ * Tries to recursively cast `t` to one of the given types and invokes `f` if
+ * successful.
*
* @tparam Class The first type to check.
* @tparam Rest The remaining types to check.
* @tparam T The base type of `t`.
- * @tparam F The callable type for the function to invoke upon a successful cast.
+ * @tparam F The callable type for the function to invoke upon a successful
+ * cast.
* @param t The object to be checked.
* @param f The function to invoke if `t` matches `Class`.
* @return `true` if `t` matched any type and `f` was called, otherwise `false`.
*/
template <typename Class, typename... Rest, typename T, typename F>
static bool tryHandleAs(T *t, F &&f) {
- if (Class *c = dyn_cast<Class>(t)) {
- f(c);
- return true;
- } else {
- return tryHandleAs<Rest...>(t, std::forward<F>(f));
- }
+ if (Class *c = dyn_cast<Class>(t)) {
+ f(c);
+ return true;
+ } else {
+ return tryHandleAs<Rest...>(t, std::forward<F>(f));
+ }
}
// Updates OriginalInits by checking Transform against loop transformation
// directives and appending their pre-inits if a match is found.
static void updatePreInits(OMPLoopBasedDirective *Transform,
SmallVectorImpl<SmallVector<Stmt *, 0>> &PreInits) {
- if (!tryHandleAs<OMPTileDirective, OMPUnrollDirective, OMPReverseDirective,
- OMPInterchangeDirective, OMPFuseDirective>(
- Transform, [&PreInits](auto *Dir) {
- appendFlattenedStmtList(PreInits.back(), Dir->getPreInits());
- }))
- llvm_unreachable("Unhandled loop transformation");
+ if (!tryHandleAs<OMPTileDirective, OMPUnrollDirective, OMPReverseDirective,
+ OMPInterchangeDirective, OMPFuseDirective>(
+ Transform, [&PreInits](auto *Dir) {
+ appendFlattenedStmtList(PreInits.back(), Dir->getPreInits());
+ }))
+ llvm_unreachable("Unhandled loop transformation");
}
bool SemaOpenMP::checkTransformableLoopNest(
@@ -14273,43 +14274,42 @@ class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor {
unsigned getNestedLoopCount() const { return NestedLoopCount; }
bool VisitForStmt(ForStmt *FS) override {
- ++NestedLoopCount;
- return true;
+ ++NestedLoopCount;
+ return true;
}
bool VisitCXXForRangeStmt(CXXForRangeStmt *FRS) override {
- ++NestedLoopCount;
- return true;
+ ++NestedLoopCount;
+ return true;
}
bool TraverseStmt(Stmt *S) override {
- if (!S)
+ if (!S)
return true;
- // Skip traversal of all expressions, including special cases like
- // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions
- // may contain inner statements (and even loops), but they are not part
- // of the syntactic body of the surrounding loop structure.
- // Therefore must not be counted
- if (isa<Expr>(S))
+ // Skip traversal of all expressions, including special cases like
+ // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions
+ // may contain inner statements (and even loops), but they are not part
+ // of the syntactic body of the surrounding loop structure.
+ // Therefore must not be counted
+ if (isa<Expr>(S))
return true;
- // Only recurse into CompoundStmt (block {}) and loop bodies
- if (isa<CompoundStmt>(S) || isa<ForStmt>(S) ||
- isa<CXXForRangeStmt>(S)) {
+ // Only recurse into CompoundStmt (block {}) and loop bodies
+ if (isa<CompoundStmt>(S) || isa<ForStmt>(S) || isa<CXXForRangeStmt>(S)) {
return DynamicRecursiveASTVisitor::TraverseStmt(S);
- }
+ }
- // Stop traversal of the rest of statements, that break perfect
- // loop nesting, such as control flow (IfStmt, SwitchStmt...)
- return true;
+ // Stop traversal of the rest of statements, that break perfect
+ // loop nesting, such as control flow (IfStmt, SwitchStmt...)
+ return true;
}
bool TraverseDecl(Decl *D) override {
- // Stop in the case of finding a declaration, it is not important
- // in order to find nested loops (Possible CXXRecordDecl, RecordDecl,
- // FunctionDecl...)
- return true;
+ // Stop in the case of finding a declaration, it is not important
+ // in order to find nested loops (Possible CXXRecordDecl, RecordDecl,
+ // FunctionDecl...)
+ return true;
}
};
@@ -14467,15 +14467,14 @@ bool SemaOpenMP::analyzeLoopSequence(
return isa<OMPLoopTransformationDirective>(Child);
};
-
// High level grammar validation
for (auto *Child : LoopSeqStmt->children()) {
- if (!Child)
+ if (!Child)
continue;
- // Skip over non-loop-sequence statements
- if (!isLoopSequenceDerivation(Child)) {
+ // Skip over non-loop-sequence statements
+ if (!isLoopSequenceDerivation(Child)) {
Child = Child->IgnoreContainers();
// Ignore empty compound statement
@@ -14493,9 +14492,9 @@ bool SemaOpenMP::analyzeLoopSequence(
// Already been treated, skip this children
continue;
}
- }
- // Regular loop sequence handling
- if (isLoopSequenceDerivation(Child)) {
+ }
+ // Regular loop sequence handling
+ if (isLoopSequenceDerivation(Child)) {
if (isLoopGeneratingStmt(Child)) {
if (!analyzeLoopGeneration(Child)) {
return false;
@@ -14509,12 +14508,12 @@ bool SemaOpenMP::analyzeLoopSequence(
// Update the Loop Sequence size by one
++LoopSeqSize;
}
- } else {
+ } else {
// Report error for invalid statement inside canonical loop sequence
Diag(Child->getBeginLoc(), diag::err_omp_not_for)
<< 0 << getOpenMPDirectiveName(Kind);
return false;
- }
+ }
}
return true;
}
@@ -14531,9 +14530,9 @@ bool SemaOpenMP::checkTransformableLoopSequence(
// Checks whether the given statement is a compound statement
if (!isa<CompoundStmt>(AStmt)) {
- Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence)
- << getOpenMPDirectiveName(Kind);
- return false;
+ Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence)
+ << getOpenMPDirectiveName(Kind);
+ return false;
}
// Number of top level canonical loop nests observed (And acts as index)
LoopSeqSize = 0;
@@ -14564,7 +14563,7 @@ bool SemaOpenMP::checkTransformableLoopSequence(
OriginalInits, TransformsPreInits,
LoopSequencePreInits, LoopCategories, Context,
Kind)) {
- return false;
+ return false;
}
if (LoopSeqSize <= 0) {
Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence)
@@ -15278,7 +15277,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
Stmt *LoopStmt = nullptr;
collectLoopStmts(AStmt, {LoopStmt});
- // Determine the PreInit declarations.e
+ // Determine the PreInit declarations.
SmallVector<Stmt *, 4> PreInits;
addLoopPreInits(Context, LoopHelper, LoopStmt, OriginalInits[0], PreInits);
@@ -15894,13 +15893,18 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
CountVal = CountInt.getZExtValue();
};
- // Checks if the loop range is valid
+ // OpenMP [6.0, Restrictions]
+ // first + count - 1 must not evaluate to a value greater than the
+ // loop sequence length of the associated canonical loop sequence.
auto ValidLoopRange = [](uint64_t FirstVal, uint64_t CountVal,
unsigned NumLoops) -> bool {
return FirstVal + CountVal - 1 <= NumLoops;
};
uint64_t FirstVal = 1, CountVal = 0, LastVal = LoopSeqSize;
+ // Validates the loop range after evaluating the semantic information
+ // and ensures that the range is valid for the given loop sequence size.
+ // Expressions are evaluated at compile time to obtain constant values.
if (LRC) {
EvaluateLoopRangeArguments(LRC->getFirst(), LRC->getCount(), FirstVal,
CountVal);
>From ac0d9e348109f742440003945d278a9c26f56976 Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:58:54 +0000
Subject: [PATCH 7/9] Added minimal changes to enable flang future
implementation
---
flang/include/flang/Parser/dump-parse-tree.h | 1 +
flang/include/flang/Parser/parse-tree.h | 9 +++++++++
flang/lib/Lower/OpenMP/Clauses.cpp | 5 +++++
flang/lib/Lower/OpenMP/Clauses.h | 1 +
flang/lib/Parser/openmp-parsers.cpp | 7 +++++++
flang/lib/Parser/unparse.cpp | 7 +++++++
flang/lib/Semantics/check-omp-structure.cpp | 9 +++++++++
llvm/include/llvm/Frontend/OpenMP/OMP.td | 1 +
8 files changed, 40 insertions(+)
diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index df9278697346f..c220c4dafb52f 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -609,6 +609,7 @@ class ParseTreeDumper {
NODE(OmpLinearClause, Modifier)
NODE(parser, OmpLinearModifier)
NODE_ENUM(OmpLinearModifier, Value)
+ NODE(parser, OmpLoopRangeClause)
NODE(parser, OmpStepComplexModifier)
NODE(parser, OmpStepSimpleModifier)
NODE(parser, OmpLoopDirective)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index 254236b510544..be80141b49e2b 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4361,6 +4361,15 @@ struct OmpLinearClause {
std::tuple<OmpObjectList, MODIFIERS(), /*PostModified=*/bool> t;
};
+// Ref: [6.0:207-208]
+//
+// loop-range-clause ->
+// LOOPRANGE(first, count) // since 6.0
+struct OmpLoopRangeClause {
+ TUPLE_CLASS_BOILERPLATE(OmpLoopRangeClause);
+ std::tuple<ScalarIntConstantExpr, ScalarIntConstantExpr> t;
+};
+
// Ref: [4.5:216-219], [5.0:315-324], [5.1:347-355], [5.2:150-158]
//
// map-clause ->
diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
index f3088b18b77ff..ea535ab3adbe7 100644
--- a/flang/lib/Lower/OpenMP/Clauses.cpp
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -998,6 +998,11 @@ Link make(const parser::OmpClause::Link &inp,
return Link{/*List=*/makeObjects(inp.v, semaCtx)};
}
+LoopRange make(const parser::OmpClause::Looprange &inp,
+ semantics::SemanticsContext &semaCtx) {
+ llvm_unreachable("Unimplemented: looprange");
+}
+
Map make(const parser::OmpClause::Map &inp,
semantics::SemanticsContext &semaCtx) {
// inp.v -> parser::OmpMapClause
diff --git a/flang/lib/Lower/OpenMP/Clauses.h b/flang/lib/Lower/OpenMP/Clauses.h
index d7ab21d428e32..bda8571e65f23 100644
--- a/flang/lib/Lower/OpenMP/Clauses.h
+++ b/flang/lib/Lower/OpenMP/Clauses.h
@@ -239,6 +239,7 @@ using Initializer = tomp::clause::InitializerT<TypeTy, IdTy, ExprTy>;
using InReduction = tomp::clause::InReductionT<TypeTy, IdTy, ExprTy>;
using IsDevicePtr = tomp::clause::IsDevicePtrT<TypeTy, IdTy, ExprTy>;
using Lastprivate = tomp::clause::LastprivateT<TypeTy, IdTy, ExprTy>;
+using LoopRange = tomp::clause::LoopRangeT<TypeTy, IdTy, ExprTy>;
using Linear = tomp::clause::LinearT<TypeTy, IdTy, ExprTy>;
using Link = tomp::clause::LinkT<TypeTy, IdTy, ExprTy>;
using Map = tomp::clause::MapT<TypeTy, IdTy, ExprTy>;
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index 52d3a5844c969..393dbe8ada002 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -841,6 +841,11 @@ TYPE_PARSER(
maybe(":"_tok >> nonemptyList(Parser<OmpLinearClause::Modifier>{})),
/*PostModified=*/pure(true)))
+TYPE_PARSER(
+ construct<OmpLoopRangeClause>(scalarIntConstantExpr,
+ "," >> scalarIntConstantExpr)
+)
+
// OpenMPv5.2 12.5.2 detach-clause -> DETACH (event-handle)
TYPE_PARSER(construct<OmpDetachClause>(Parser<OmpObject>{}))
@@ -1010,6 +1015,8 @@ TYPE_PARSER( //
parenthesized(Parser<OmpLinearClause>{}))) ||
"LINK" >> construct<OmpClause>(construct<OmpClause::Link>(
parenthesized(Parser<OmpObjectList>{}))) ||
+ "LOOPRANGE" >> construct<OmpClause>(construct<OmpClause::Looprange>(
+ parenthesized(Parser<OmpLoopRangeClause>{}))) ||
"MAP" >> construct<OmpClause>(construct<OmpClause::Map>(
parenthesized(Parser<OmpMapClause>{}))) ||
"MATCH" >> construct<OmpClause>(construct<OmpClause::Match>(
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index a626888b7dfe5..00b5a8c0600e1 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2314,6 +2314,13 @@ class UnparseVisitor {
}
}
}
+ void Unparse(const OmpLoopRangeClause &x) {
+ Word("LOOPRANGE(");
+ Walk(std::get<0>(x.t));
+ Put(", ");
+ Walk(std::get<1>(x.t));
+ Put(")");
+ }
void Unparse(const OmpReductionClause &x) {
using Modifier = OmpReductionClause::Modifier;
Walk(std::get<std::optional<std::list<Modifier>>>(x.t), ": ");
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index 606014276e7ca..4af2b4909fcb6 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -3383,6 +3383,15 @@ CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Collapse, OMPC_collapse)
CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Safelen, OMPC_safelen)
CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Simdlen, OMPC_simdlen)
+void OmpStructureChecker::Enter(const parser::OmpClause::Looprange &x) {
+ context_.Say(GetContext().clauseSource,
+ "LOOPRANGE clause is not implemented yet"_err_en_US,
+ ContextDirectiveAsFortran());
+}
+
+void OmpStructureChecker::Enter(const parser::OmpClause::FreeAgent &x) {
+ context_.Say(GetContext().clauseSource,
+ "FREE_AGENT clause is not implemented yet"_err_en_US,
// Restrictions specific to each clause are implemented apart from the
// generalized restrictions.
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index ae19385c022d0..3be758686c634 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -273,6 +273,7 @@ def OMPC_Link : Clause<"link"> {
}
def OMPC_LoopRange : Clause<"looprange"> {
let clangClass = "OMPLoopRangeClause";
+ let flangClass = "OmpLoopRangeClause";
}
def OMPC_Map : Clause<"map"> {
let clangClass = "OMPMapClause";
>From e6e00ae563e491968637e00d2a15a7272bc9d146 Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Wed, 21 May 2025 13:14:22 +0000
Subject: [PATCH 8/9] Address basic PR feedback
---
clang/include/clang/AST/OpenMPClause.h | 93 ++++----
clang/include/clang/AST/StmtOpenMP.h | 3 +-
clang/include/clang/Sema/SemaOpenMP.h | 14 +-
clang/lib/AST/OpenMPClause.cpp | 17 +-
clang/lib/CodeGen/CGExpr.cpp | 5 +-
clang/lib/CodeGen/CodeGenFunction.h | 4 -
clang/lib/Sema/SemaOpenMP.cpp | 224 +++++++++-----------
flang/lib/Semantics/check-omp-structure.cpp | 3 -
8 files changed, 166 insertions(+), 197 deletions(-)
diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index 8f937cdef9cd0..3df5133a17fb4 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -1153,82 +1153,73 @@ class OMPFullClause final : public OMPNoChildClause<llvm::omp::OMPC_full> {
/// for(int j = 0; j < 256; j+=2)
/// for(int k = 127; k >= 0; --k)
/// \endcode
-class OMPLoopRangeClause final : public OMPClause {
+class OMPLoopRangeClause final
+ : public OMPClause,
+ private llvm::TrailingObjects<OMPLoopRangeClause, Expr *> {
friend class OMPClauseReader;
-
- explicit OMPLoopRangeClause()
- : OMPClause(llvm::omp::OMPC_looprange, {}, {}) {}
+ friend class llvm::TrailingObjects<OMPLoopRangeClause, Expr *>;
/// Location of '('
SourceLocation LParenLoc;
- /// Location of 'first'
- SourceLocation FirstLoc;
-
- /// Location of 'count'
- SourceLocation CountLoc;
-
- /// Expr associated with 'first' argument
- Expr *First = nullptr;
-
- /// Expr associated with 'count' argument
- Expr *Count = nullptr;
-
- /// Set 'first'
- void setFirst(Expr *First) { this->First = First; }
+ /// Location of first and count expressions
+ SourceLocation FirstLoc, CountLoc;
- /// Set 'count'
- void setCount(Expr *Count) { this->Count = Count; }
+ /// Number of looprange arguments (always 2: first, count)
+ unsigned NumArgs = 2;
- /// Set location of '('.
- void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
-
- /// Set location of 'first' argument
- void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; }
+ /// Set the argument expressions.
+ void setArgs(ArrayRef<Expr *> Args) {
+ assert(Args.size() == NumArgs && "Expected exactly 2 looprange arguments");
+ std::copy(Args.begin(), Args.end(), getTrailingObjects<Expr *>());
+ }
- /// Set location of 'count' argument
- void setCountLoc(SourceLocation Loc) { CountLoc = Loc; }
+ /// Build an empty clause for deserialization.
+ explicit OMPLoopRangeClause()
+ : OMPClause(llvm::omp::OMPC_looprange, {}, {}), NumArgs(2) {}
public:
- /// Build an AST node for a 'looprange' clause
- ///
- /// \param StartLoc Starting location of the clause.
- /// \param LParenLoc Location of '('.
- /// \param ModifierLoc Modifier location.
- /// \param
+ /// Build a 'looprange' clause AST node.
static OMPLoopRangeClause *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
SourceLocation FirstLoc, SourceLocation CountLoc,
- SourceLocation EndLoc, Expr *First, Expr *Count);
+ SourceLocation EndLoc, ArrayRef<Expr *> Args);
- /// Build an empty 'looprange' node for deserialization
- ///
- /// \param C Context of the AST.
+ /// Build an empty 'looprange' clause node.
static OMPLoopRangeClause *CreateEmpty(const ASTContext &C);
- /// Returns the location of '('
+ // Location getters/setters
SourceLocation getLParenLoc() const { return LParenLoc; }
-
- /// Returns the location of 'first'
SourceLocation getFirstLoc() const { return FirstLoc; }
-
- /// Returns the location of 'count'
SourceLocation getCountLoc() const { return CountLoc; }
- /// Returns the argument 'first' or nullptr if not set
- Expr *getFirst() const { return cast_or_null<Expr>(First); }
+ void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+ void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; }
+ void setCountLoc(SourceLocation Loc) { CountLoc = Loc; }
- /// Returns the argument 'count' or nullptr if not set
- Expr *getCount() const { return cast_or_null<Expr>(Count); }
+ /// Get looprange arguments: first and count
+ Expr *getFirst() const { return getArgs()[0]; }
+ Expr *getCount() const { return getArgs()[1]; }
- child_range children() {
- return child_range(reinterpret_cast<Stmt **>(&First),
- reinterpret_cast<Stmt **>(&Count) + 1);
+ /// Set looprange arguments: first and count
+ void setFirst(Expr *E) { getArgs()[0] = E; }
+ void setCount(Expr *E) { getArgs()[1] = E; }
+
+ MutableArrayRef<Expr *> getArgs() {
+ return MutableArrayRef<Expr *>(getTrailingObjects<Expr *>(), NumArgs);
+ }
+ ArrayRef<Expr *> getArgs() const {
+ return ArrayRef<Expr *>(getTrailingObjects<Expr *>(), NumArgs);
}
+ child_range children() {
+ return child_range(reinterpret_cast<Stmt **>(getArgs().begin()),
+ reinterpret_cast<Stmt **>(getArgs().end()));
+ }
const_child_range children() const {
- auto Children = const_cast<OMPLoopRangeClause *>(this)->children();
- return const_child_range(Children.begin(), Children.end());
+ auto AR = getArgs();
+ return const_child_range(reinterpret_cast<Stmt *const *>(AR.begin()),
+ reinterpret_cast<Stmt *const *>(AR.end()));
}
child_range used_children() {
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index b6a948a8c6020..cb871c9894d01 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -5807,7 +5807,6 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective {
llvm::omp::OMPD_reverse, StartLoc,
EndLoc, 1) {
// Reverse produces a single top-level canonical loop nest
- setNumGeneratedLoops(1);
setNumGeneratedLoopNests(1);
}
@@ -5878,7 +5877,7 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective {
EndLoc, NumLoops) {
// Interchange produces a single top-level canonical loop
// nest, with the exact same amount of total loops
- setNumGeneratedLoops(NumLoops);
+ setNumGeneratedLoops(3 * NumLoops);
setNumGeneratedLoopNests(1);
}
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index ac4cbe3709a0d..35bb884c0c1f2 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -1491,7 +1491,7 @@ class SemaOpenMP : public SemaBase {
bool checkTransformableLoopNest(
OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops,
SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
- Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits);
+ Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits);
/// @brief Categories of loops encountered during semantic OpenMP loop
/// analysis
@@ -1554,9 +1554,9 @@ class SemaOpenMP : public SemaBase {
Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops,
SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
SmallVectorImpl<Stmt *> &ForStmts,
- SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
- SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
- SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+ SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits,
+ SmallVectorImpl<SmallVector<Stmt *>> &TransformsPreInits,
+ SmallVectorImpl<SmallVector<Stmt *>> &LoopSequencePreInits,
SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context,
OpenMPDirectiveKind Kind);
@@ -1590,9 +1590,9 @@ class SemaOpenMP : public SemaBase {
unsigned &NumLoops,
SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
SmallVectorImpl<Stmt *> &ForStmts,
- SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
- SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
- SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+ SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits,
+ SmallVectorImpl<SmallVector<Stmt *>> &TransformsPreInits,
+ SmallVectorImpl<SmallVector<Stmt *>> &LoopSequencePreInits,
SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context);
/// Helper to keep information about the current `omp begin/end declare
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 0b5808eb100e4..e0570262b2a05 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -1026,22 +1026,25 @@ OMPPartialClause *OMPPartialClause::CreateEmpty(const ASTContext &C) {
OMPLoopRangeClause *
OMPLoopRangeClause::Create(const ASTContext &C, SourceLocation StartLoc,
- SourceLocation LParenLoc, SourceLocation EndLoc,
- SourceLocation FirstLoc, SourceLocation CountLoc,
- Expr *First, Expr *Count) {
+ SourceLocation LParenLoc, SourceLocation FirstLoc,
+ SourceLocation CountLoc, SourceLocation EndLoc,
+ ArrayRef<Expr *> Args) {
+
+ assert(Args.size() == 2 &&
+ "looprange clause must have exactly two arguments");
OMPLoopRangeClause *Clause = CreateEmpty(C);
Clause->setLocStart(StartLoc);
Clause->setLParenLoc(LParenLoc);
- Clause->setLocEnd(EndLoc);
Clause->setFirstLoc(FirstLoc);
Clause->setCountLoc(CountLoc);
- Clause->setFirst(First);
- Clause->setCount(Count);
+ Clause->setLocEnd(EndLoc);
+ Clause->setArgs(Args);
return Clause;
}
OMPLoopRangeClause *OMPLoopRangeClause::CreateEmpty(const ASTContext &C) {
- return new (C) OMPLoopRangeClause();
+ void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(2));
+ return new (Mem) OMPLoopRangeClause();
}
OMPAllocateClause *OMPAllocateClause::Create(
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 1671f07bc2760..268e4220b05b6 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3241,11 +3241,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
var, ConvertTypeForMem(VD->getType()), getContext().getDeclAlign(VD));
// No other cases for now.
- } else {
- llvm::dbgs() << "THE DAMN DECLREFEXPR HASN'T BEEN ENTERED IN LOCALDECLMAP\n";
- VD->dumpColor();
+ } else
llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?");
- }
// Handle threadlocal function locals.
if (VD->getTLSKind() != VarDecl::TLS_None)
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index ce00198c396b6..a983901f560de 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -5414,10 +5414,6 @@ class CodeGenFunction : public CodeGenTypeCache {
/// Set the address of a local variable.
void setAddrOfLocalVar(const VarDecl *VD, Address Addr) {
- if (LocalDeclMap.count(VD)) {
- llvm::errs() << "Warning: VarDecl already exists in map: ";
- VD->dumpColor();
- }
assert(!LocalDeclMap.count(VD) && "Decl already exists in LocalDeclMap!");
LocalDeclMap.insert({VD, Addr});
}
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 485eebf23ef93..d2da417e5cfde 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -14159,38 +14159,37 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective(
getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
}
-// Overloaded base case function
+/// Overloaded base case function
template <typename T, typename F> static bool tryHandleAs(T *t, F &&) {
return false;
}
-/**
- * Tries to recursively cast `t` to one of the given types and invokes `f` if
- * successful.
- *
- * @tparam Class The first type to check.
- * @tparam Rest The remaining types to check.
- * @tparam T The base type of `t`.
- * @tparam F The callable type for the function to invoke upon a successful
- * cast.
- * @param t The object to be checked.
- * @param f The function to invoke if `t` matches `Class`.
- * @return `true` if `t` matched any type and `f` was called, otherwise `false`.
- */
+///
+/// Tries to recursively cast `t` to one of the given types and invokes `f` if
+/// successful.
+///
+/// @tparam Class The first type to check.
+/// @tparam Rest The remaining types to check.
+/// @tparam T The base type of `t`.
+/// @tparam F The callable type for the function to invoke upon a successful
+/// cast.
+/// @param t The object to be checked.
+/// @param f The function to invoke if `t` matches `Class`.
+/// @return `true` if `t` matched any type and `f` was called, otherwise
+/// `false`.
template <typename Class, typename... Rest, typename T, typename F>
static bool tryHandleAs(T *t, F &&f) {
if (Class *c = dyn_cast<Class>(t)) {
f(c);
return true;
- } else {
- return tryHandleAs<Rest...>(t, std::forward<F>(f));
}
+ return tryHandleAs<Rest...>(t, std::forward<F>(f));
}
-// Updates OriginalInits by checking Transform against loop transformation
-// directives and appending their pre-inits if a match is found.
+/// Updates OriginalInits by checking Transform against loop transformation
+/// directives and appending their pre-inits if a match is found.
static void updatePreInits(OMPLoopBasedDirective *Transform,
- SmallVectorImpl<SmallVector<Stmt *, 0>> &PreInits) {
+ SmallVectorImpl<SmallVector<Stmt *>> &PreInits) {
if (!tryHandleAs<OMPTileDirective, OMPUnrollDirective, OMPReverseDirective,
OMPInterchangeDirective, OMPFuseDirective>(
Transform, [&PreInits](auto *Dir) {
@@ -14202,7 +14201,7 @@ static void updatePreInits(OMPLoopBasedDirective *Transform,
bool SemaOpenMP::checkTransformableLoopNest(
OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops,
SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
- Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits) {
+ Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits) {
OriginalInits.emplace_back();
bool Result = OMPLoopBasedDirective::doForAllLoops(
AStmt->IgnoreContainers(), /*TryImperfectlyNestedLoops=*/false, NumLoops,
@@ -14236,40 +14235,40 @@ bool SemaOpenMP::checkTransformableLoopNest(
return Result;
}
-// Counts the total number of nested loops, including the outermost loop (the
-// original loop). PRECONDITION of this visitor is that it must be invoked from
-// the original loop to be analyzed. The traversal is stop for Decl's and
-// Expr's given that they may contain inner loops that must not be counted.
-//
-// Example AST structure for the code:
-//
-// int main() {
-// #pragma omp fuse
-// {
-// for (int i = 0; i < 100; i++) { <-- Outer loop
-// []() {
-// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP
-// };
-// for(int j = 0; j < 5; ++j) {} <-- Inner loop
-// }
-// for (int r = 0; i < 100; i++) { <-- Outer loop
-// struct LocalClass {
-// void bar() {
-// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP
-// }
-// };
-// for(int k = 0; k < 10; ++k) {} <-- Inner loop
-// {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP
-// }
-// }
-// }
-// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops
+/// Counts the total number of nested loops, including the outermost loop (the
+/// original loop). PRECONDITION of this visitor is that it must be invoked from
+/// the original loop to be analyzed. The traversal is stop for Decl's and
+/// Expr's given that they may contain inner loops that must not be counted.
+///
+/// Example AST structure for the code:
+///
+/// int main() {
+/// #pragma omp fuse
+/// {
+/// for (int i = 0; i < 100; i++) { <-- Outer loop
+/// []() {
+/// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP
+/// };
+/// for(int j = 0; j < 5; ++j) {} <-- Inner loop
+/// }
+/// for (int r = 0; i < 100; i++) { <-- Outer loop
+/// struct LocalClass {
+/// void bar() {
+/// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP
+/// }
+/// };
+/// for(int k = 0; k < 10; ++k) {} <-- Inner loop
+/// {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP
+/// }
+/// }
+/// }
+/// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops
class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor {
private:
unsigned NestedLoopCount = 0;
public:
- explicit NestedLoopCounterVisitor() {}
+ explicit NestedLoopCounterVisitor() = default;
unsigned getNestedLoopCount() const { return NestedLoopCount; }
@@ -14296,7 +14295,7 @@ class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor {
return true;
// Only recurse into CompoundStmt (block {}) and loop bodies
- if (isa<CompoundStmt>(S) || isa<ForStmt>(S) || isa<CXXForRangeStmt>(S)) {
+ if (isa<CompoundStmt, ForStmt, CXXForRangeStmt>(S)) {
return DynamicRecursiveASTVisitor::TraverseStmt(S);
}
@@ -14317,19 +14316,18 @@ bool SemaOpenMP::analyzeLoopSequence(
Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops,
SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
SmallVectorImpl<Stmt *> &ForStmts,
- SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
- SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
- SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+ SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits,
+ SmallVectorImpl<SmallVector<Stmt *>> &TransformsPreInits,
+ SmallVectorImpl<SmallVector<Stmt *>> &LoopSequencePreInits,
SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context,
OpenMPDirectiveKind Kind) {
VarsWithInheritedDSAType TmpDSA;
QualType BaseInductionVarType;
- // Helper Lambda to handle storing initialization and body statements for both
- // ForStmt and CXXForRangeStmt and checks for any possible mismatch between
- // induction variables types
- auto storeLoopStatements = [&OriginalInits, &ForStmts, &BaseInductionVarType,
- this, &Context](Stmt *LoopStmt) {
+ /// Helper Lambda to handle storing initialization and body statements for
+ /// both ForStmt and CXXForRangeStmt and checks for any possible mismatch
+ /// between induction variables types
+ auto StoreLoopStatements = [&](Stmt *LoopStmt) {
if (auto *For = dyn_cast<ForStmt>(LoopStmt)) {
OriginalInits.back().push_back(For->getInit());
ForStmts.push_back(For);
@@ -14357,16 +14355,11 @@ bool SemaOpenMP::analyzeLoopSequence(
}
};
- // Helper lambda functions to encapsulate the processing of different
- // derivations of the canonical loop sequence grammar
- //
- // Modularized code for handling loop generation and transformations
- auto analyzeLoopGeneration = [&storeLoopStatements, &LoopHelpers,
- &OriginalInits, &TransformsPreInits,
- &LoopCategories, &LoopSeqSize, &NumLoops, Kind,
- &TmpDSA, &ForStmts, &Context,
- &LoopSequencePreInits, this](Stmt *Child) {
- auto LoopTransform = dyn_cast<OMPLoopTransformationDirective>(Child);
+ /// Helper lambda functions to encapsulate the processing of different
+ /// derivations of the canonical loop sequence grammar
+ /// Modularized code for handling loop generation and transformations
+ auto AnalyzeLoopGeneration = [&](Stmt *Child) {
+ auto *LoopTransform = dyn_cast<OMPLoopTransformationDirective>(Child);
Stmt *TransformedStmt = LoopTransform->getTransformedStmt();
unsigned NumGeneratedLoopNests = LoopTransform->getNumGeneratedLoopNests();
unsigned NumGeneratedLoops = LoopTransform->getNumGeneratedLoops();
@@ -14377,9 +14370,8 @@ bool SemaOpenMP::analyzeLoopSequence(
LoopSeqSize += NumGeneratedLoopNests;
NumLoops += NumGeneratedLoops;
return true;
- }
- // Unroll full (0 loops produced)
- else {
+ } else {
+ // Unroll full (0 loops produced)
Diag(Child->getBeginLoc(), diag::err_omp_not_for)
<< 0 << getOpenMPDirectiveName(Kind);
return false;
@@ -14406,9 +14398,8 @@ bool SemaOpenMP::analyzeLoopSequence(
LoopHelpers, ForStmts, OriginalInits,
TransformsPreInits, LoopSequencePreInits,
LoopCategories, Context, Kind);
- }
- // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all)
- else {
+ } else {
+ // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all)
// Process the transformed loop statement
OriginalInits.emplace_back();
TransformsPreInits.emplace_back();
@@ -14424,7 +14415,7 @@ bool SemaOpenMP::analyzeLoopSequence(
<< getOpenMPDirectiveName(Kind);
return false;
}
- storeLoopStatements(TransformedStmt);
+ StoreLoopStatements(TransformedStmt);
updatePreInits(LoopTransform, TransformsPreInits);
NumLoops += NumGeneratedLoops;
@@ -14433,10 +14424,8 @@ bool SemaOpenMP::analyzeLoopSequence(
}
};
- // Modularized code for handling regular canonical loops
- auto analyzeRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits,
- &LoopSeqSize, &NumLoops, Kind, &TmpDSA,
- &LoopCategories, this](Stmt *Child) {
+ /// Modularized code for handling regular canonical loops
+ auto AnalyzeRegularLoop = [&](Stmt *Child) {
OriginalInits.emplace_back();
LoopHelpers.emplace_back();
LoopCategories.push_back(OMPLoopCategory::RegularLoop);
@@ -14451,19 +14440,19 @@ bool SemaOpenMP::analyzeLoopSequence(
return false;
}
- storeLoopStatements(Child);
+ StoreLoopStatements(Child);
auto NLCV = NestedLoopCounterVisitor();
NLCV.TraverseStmt(Child);
NumLoops += NLCV.getNestedLoopCount();
return true;
};
- // Helper functions to validate canonical loop sequence grammar is valid
- auto isLoopSequenceDerivation = [](auto *Child) {
- return isa<ForStmt>(Child) || isa<CXXForRangeStmt>(Child) ||
- isa<OMPLoopTransformationDirective>(Child);
+ /// Helper functions to validate loop sequence grammar derivations
+ auto IsLoopSequenceDerivation = [](auto *Child) {
+ return isa<ForStmt, CXXForRangeStmt, OMPLoopTransformationDirective>(Child);
};
- auto isLoopGeneratingStmt = [](auto *Child) {
+ /// Helper functions to validate loop generating grammar derivations
+ auto IsLoopGeneratingStmt = [](auto *Child) {
return isa<OMPLoopTransformationDirective>(Child);
};
@@ -14474,7 +14463,7 @@ bool SemaOpenMP::analyzeLoopSequence(
continue;
// Skip over non-loop-sequence statements
- if (!isLoopSequenceDerivation(Child)) {
+ if (!IsLoopSequenceDerivation(Child)) {
Child = Child->IgnoreContainers();
// Ignore empty compound statement
@@ -14494,17 +14483,17 @@ bool SemaOpenMP::analyzeLoopSequence(
}
}
// Regular loop sequence handling
- if (isLoopSequenceDerivation(Child)) {
- if (isLoopGeneratingStmt(Child)) {
- if (!analyzeLoopGeneration(Child)) {
+ if (IsLoopSequenceDerivation(Child)) {
+ if (IsLoopGeneratingStmt(Child)) {
+ if (!AnalyzeLoopGeneration(Child))
return false;
- }
- // analyzeLoopGeneration updates Loop Sequence size accordingly
+
+ // AnalyzeLoopGeneration updates Loop Sequence size accordingly
} else {
- if (!analyzeRegularLoop(Child)) {
+ if (!AnalyzeRegularLoop(Child))
return false;
- }
+
// Update the Loop Sequence size by one
++LoopSeqSize;
}
@@ -14523,9 +14512,9 @@ bool SemaOpenMP::checkTransformableLoopSequence(
unsigned &NumLoops,
SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
SmallVectorImpl<Stmt *> &ForStmts,
- SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
- SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
- SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+ SmallVectorImpl<SmallVector<Stmt *>> &OriginalInits,
+ SmallVectorImpl<SmallVector<Stmt *>> &TransformsPreInits,
+ SmallVectorImpl<SmallVector<Stmt *>> &LoopSequencePreInits,
SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context) {
// Checks whether the given statement is a compound statement
@@ -14561,10 +14550,9 @@ bool SemaOpenMP::checkTransformableLoopSequence(
// Recursive entry point to process the main loop sequence
if (!analyzeLoopSequence(AStmt, LoopSeqSize, NumLoops, LoopHelpers, ForStmts,
OriginalInits, TransformsPreInits,
- LoopSequencePreInits, LoopCategories, Context,
- Kind)) {
+ LoopSequencePreInits, LoopCategories, Context, Kind))
return false;
- }
+
if (LoopSeqSize <= 0) {
Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence)
<< getOpenMPDirectiveName(Kind);
@@ -14656,7 +14644,7 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
// Verify and diagnose loop nest.
SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops);
Stmt *Body = nullptr;
- SmallVector<SmallVector<Stmt *, 0>, 4> OriginalInits;
+ SmallVector<SmallVector<Stmt *>, 4> OriginalInits;
if (!checkTransformableLoopNest(OMPD_tile, AStmt, NumLoops, LoopHelpers, Body,
OriginalInits))
return StmtError();
@@ -14933,7 +14921,7 @@ StmtResult SemaOpenMP::ActOnOpenMPStripeDirective(ArrayRef<OMPClause *> Clauses,
// Verify and diagnose loop nest.
SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops);
Stmt *Body = nullptr;
- SmallVector<SmallVector<Stmt *, 0>, 4> OriginalInits;
+ SmallVector<SmallVector<Stmt *>, 4> OriginalInits;
if (!checkTransformableLoopNest(OMPD_stripe, AStmt, NumLoops, LoopHelpers,
Body, OriginalInits))
return StmtError();
@@ -15194,7 +15182,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
Stmt *Body = nullptr;
SmallVector<OMPLoopBasedDirective::HelperExprs, NumLoops> LoopHelpers(
NumLoops);
- SmallVector<SmallVector<Stmt *, 0>, NumLoops + 1> OriginalInits;
+ SmallVector<SmallVector<Stmt *>, NumLoops + 1> OriginalInits;
if (!checkTransformableLoopNest(OMPD_unroll, AStmt, NumLoops, LoopHelpers,
Body, OriginalInits))
return StmtError();
@@ -15462,7 +15450,7 @@ StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt,
Stmt *Body = nullptr;
SmallVector<OMPLoopBasedDirective::HelperExprs, NumLoops> LoopHelpers(
NumLoops);
- SmallVector<SmallVector<Stmt *, 0>, NumLoops + 1> OriginalInits;
+ SmallVector<SmallVector<Stmt *>, NumLoops + 1> OriginalInits;
if (!checkTransformableLoopNest(OMPD_reverse, AStmt, NumLoops, LoopHelpers,
Body, OriginalInits))
return StmtError();
@@ -15654,7 +15642,7 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective(
// Verify and diagnose loop nest.
SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops);
Stmt *Body = nullptr;
- SmallVector<SmallVector<Stmt *, 0>, 2> OriginalInits;
+ SmallVector<SmallVector<Stmt *>, 2> OriginalInits;
if (!checkTransformableLoopNest(OMPD_interchange, AStmt, NumLoops,
LoopHelpers, Body, OriginalInits))
return StmtError();
@@ -15841,9 +15829,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
CaptureVars CopyTransformer(SemaRef);
// Ensure the structured block is not empty
- if (!AStmt) {
+ if (!AStmt)
return StmtError();
- }
unsigned NumLoops = 1;
unsigned LoopSeqSize = 1;
@@ -15862,16 +15849,15 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
// Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops
SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers;
SmallVector<Stmt *> LoopStmts;
- SmallVector<SmallVector<Stmt *, 0>> OriginalInits;
- SmallVector<SmallVector<Stmt *, 0>> TransformsPreInits;
- SmallVector<SmallVector<Stmt *, 0>> LoopSequencePreInits;
+ SmallVector<SmallVector<Stmt *>> OriginalInits;
+ SmallVector<SmallVector<Stmt *>> TransformsPreInits;
+ SmallVector<SmallVector<Stmt *>> LoopSequencePreInits;
SmallVector<OMPLoopCategory, 0> LoopCategories;
if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops,
LoopHelpers, LoopStmts, OriginalInits,
TransformsPreInits, LoopSequencePreInits,
- LoopCategories, Context)) {
+ LoopCategories, Context))
return StmtError();
- }
// Handle clauses, which can be any of the following: [looprange, apply]
const OMPLoopRangeClause *LRC =
@@ -15961,9 +15947,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
// expressions. Generates both the variable declaration and the corresponding
// initialization statement.
auto CreateHelperVarAndStmt =
- [&SemaRef = this->SemaRef, &Context, &CopyTransformer,
- &IVType](Expr *ExprToCopy, const std::string &BaseName, unsigned I,
- bool NeedsNewVD = false) {
+ [&, &SemaRef = SemaRef](Expr *ExprToCopy, const std::string &BaseName,
+ unsigned I, bool NeedsNewVD = false) {
Expr *TransformedExpr =
AssertSuccess(CopyTransformer.TransformExpr(ExprToCopy));
if (!TransformedExpr)
@@ -16007,9 +15992,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
// Transformations that apply this concept: Loopranged Fuse, Split
if (!LoopSequencePreInits.empty()) {
for (const auto <PreInits : LoopSequencePreInits) {
- if (!LTPreInits.empty()) {
+ if (!LTPreInits.empty())
llvm::append_range(PreInits, LTPreInits);
- }
}
}
@@ -16038,9 +16022,9 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
// Order matters: pre-inits may define variables used in the original
// inits such as upper bounds...
auto TransformPreInit = TransformsPreInits[TransformIndex++];
- if (!TransformPreInit.empty()) {
+ if (!TransformPreInit.empty())
llvm::append_range(PreInits, TransformPreInit);
- }
+
addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I],
PreInits);
}
@@ -17459,13 +17443,15 @@ OMPClause *SemaOpenMP::ActOnOpenMPLoopRangeClause(
if (CountVal.isInvalid())
Count = nullptr;
+ SmallVector<Expr *, 2> ArgsVec = {First, Count};
+
// OpenMP [6.0, Restrictions]
// first + count - 1 must not evaluate to a value greater than the
// loop sequence length of the associated canonical loop sequence.
// This check must be performed afterwards due to the delayed
// parsing and computation of the associated loop sequence
return OMPLoopRangeClause::Create(getASTContext(), StartLoc, LParenLoc,
- FirstLoc, CountLoc, EndLoc, First, Count);
+ FirstLoc, CountLoc, EndLoc, ArgsVec);
}
OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc,
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index 4af2b4909fcb6..ad4f54e6fdcc5 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -3389,9 +3389,6 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Looprange &x) {
ContextDirectiveAsFortran());
}
-void OmpStructureChecker::Enter(const parser::OmpClause::FreeAgent &x) {
- context_.Say(GetContext().clauseSource,
- "FREE_AGENT clause is not implemented yet"_err_en_US,
// Restrictions specific to each clause are implemented apart from the
// generalized restrictions.
>From 4100dfe4dd04ed1c953ea4e38a65e867c8e9f73f Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Thu, 22 May 2025 10:39:39 +0000
Subject: [PATCH 9/9] Removed unncessary warning and updated tests accordingly
---
.../clang/Basic/DiagnosticSemaKinds.td | 3 --
clang/lib/Sema/SemaOpenMP.cpp | 21 +--------
clang/test/OpenMP/fuse_messages.cpp | 43 +++++++++++++++----
3 files changed, 35 insertions(+), 32 deletions(-)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index a6ae0de004c8a..d1790cea6cc45 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11558,9 +11558,6 @@ def note_omp_implicit_dsa : Note<
"implicitly determined as %0">;
def err_omp_loop_var_dsa : Error<
"loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">;
-def warn_omp_different_loop_ind_var_types : Warning <
- "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">,
- InGroup<OpenMPLoopForm>;
def err_omp_not_canonical_loop : Error <
"loop after '#pragma omp %0' is not in canonical form">;
def err_omp_not_a_loop_sequence : Error <
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index d2da417e5cfde..76484b577f9c1 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -14323,31 +14323,12 @@ bool SemaOpenMP::analyzeLoopSequence(
OpenMPDirectiveKind Kind) {
VarsWithInheritedDSAType TmpDSA;
- QualType BaseInductionVarType;
/// Helper Lambda to handle storing initialization and body statements for
- /// both ForStmt and CXXForRangeStmt and checks for any possible mismatch
- /// between induction variables types
+ /// both ForStmt and CXXForRangeStmt
auto StoreLoopStatements = [&](Stmt *LoopStmt) {
if (auto *For = dyn_cast<ForStmt>(LoopStmt)) {
OriginalInits.back().push_back(For->getInit());
ForStmts.push_back(For);
- // Extract induction variable
- if (auto *InitStmt = dyn_cast_or_null<DeclStmt>(For->getInit())) {
- if (auto *InitDecl = dyn_cast<VarDecl>(InitStmt->getSingleDecl())) {
- QualType InductionVarType = InitDecl->getType().getCanonicalType();
-
- // Compare with first loop type
- if (BaseInductionVarType.isNull()) {
- BaseInductionVarType = InductionVarType;
- } else if (!Context.hasSameType(BaseInductionVarType,
- InductionVarType)) {
- Diag(InitDecl->getBeginLoc(),
- diag::warn_omp_different_loop_ind_var_types)
- << getOpenMPDirectiveName(OMPD_fuse) << BaseInductionVarType
- << InductionVarType;
- }
- }
- }
} else {
auto *CXXFor = cast<CXXForRangeStmt>(LoopStmt);
OriginalInits.back().push_back(CXXFor->getBeginStmt());
diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp
index 2a2491d008a0b..4902d424373e5 100644
--- a/clang/test/OpenMP/fuse_messages.cpp
+++ b/clang/test/OpenMP/fuse_messages.cpp
@@ -70,15 +70,6 @@ void func() {
for(int j = 0; j < 10; ++j);
}
- //expected-warning at +5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'unsigned int'}}
- //expected-warning at +5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'long long'}}
- #pragma omp fuse
- {
- for(int i = 0; i < 10; ++i);
- for(unsigned int j = 0; j < 10; ++j);
- for(long long k = 0; k < 100; ++k);
- }
-
//expected-warning at +2 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}}
#pragma omp fuse
{
@@ -123,6 +114,40 @@ void func() {
for(int j = 0; j < 100; ++j);
for(int k = 0; k < 50; ++k);
}
+
+ //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '6' is greater than the total number of loops '5'}}
+ #pragma omp fuse looprange(1,6)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ // This fusion results in 2 loops
+ #pragma omp fuse looprange(1,2)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+ }
+
+ //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '4' is greater than the total number of loops '3'}}
+ #pragma omp fuse looprange(2,3)
+ {
+ #pragma omp unroll partial(2)
+ for(int i = 0; i < 10; ++i);
+
+ #pragma omp reverse
+ for(int j = 0; j < 10; ++j);
+
+ #pragma omp fuse
+ {
+ {
+ #pragma omp reverse
+ for(int j = 0; j < 10; ++j);
+ }
+ for(int k = 0; k < 50; ++k);
+ }
+ }
}
// In a template context, but expression itself not instantiation-dependent
More information about the cfe-commits
mailing list