[clang] [flang] [llvm] [openmp] [Clang][OpenMP][LoopTransformations] Add support for "#pragma omp fuse" and clause LoopRange (PR #139293)
Walter J.T.V via llvm-commits
llvm-commits at lists.llvm.org
Fri May 9 10:05:55 PDT 2025
https://github.com/eZWALT created https://github.com/llvm/llvm-project/pull/139293
This pull request introduces full support for the #pragma omp fuse directive, as specified in the OpenMP 6.0 specification, along with initial support for the looprange clause in Clang.
To enable this functionality, infrastructure for the Loop Sequence construct, also new in OpenMP 6.0, has been implemented. Additionally, a minimal code skeleton has been added to Flang to ensure compatibility and avoid integration issues, although a full implementation in Flang is still pending.
https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-6-0.pdf
P.S. As a follow-up to this loop transformation work, I'm currently preparing a patch that implements the "#pragma omp split" directive, also introduced in OpenMP 6.0.
>From 5e01792a04a20dfc76097081ac1cf3da71bc97b6 Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:25:33 +0000
Subject: [PATCH 1/7] Add fuse directive patch
---
clang/include/clang-c/Index.h | 4 +
clang/include/clang/AST/RecursiveASTVisitor.h | 3 +
clang/include/clang/AST/StmtOpenMP.h | 105 +-
.../clang/Basic/DiagnosticSemaKinds.td | 8 +
clang/include/clang/Basic/StmtNodes.td | 1 +
clang/include/clang/Sema/SemaOpenMP.h | 27 +
.../include/clang/Serialization/ASTBitCodes.h | 1 +
clang/lib/AST/StmtOpenMP.cpp | 25 +
clang/lib/AST/StmtPrinter.cpp | 5 +
clang/lib/AST/StmtProfile.cpp | 4 +
clang/lib/Basic/OpenMPKinds.cpp | 2 +-
clang/lib/CodeGen/CGStmt.cpp | 3 +
clang/lib/CodeGen/CGStmtOpenMP.cpp | 8 +
clang/lib/CodeGen/CodeGenFunction.h | 1 +
clang/lib/Sema/SemaExceptionSpec.cpp | 1 +
clang/lib/Sema/SemaOpenMP.cpp | 600 +++++++
clang/lib/Sema/TreeTransform.h | 11 +
clang/lib/Serialization/ASTReaderStmt.cpp | 11 +
clang/lib/Serialization/ASTWriterStmt.cpp | 6 +
clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 1 +
clang/test/OpenMP/fuse_ast_print.cpp | 278 +++
clang/test/OpenMP/fuse_codegen.cpp | 1511 +++++++++++++++++
clang/test/OpenMP/fuse_messages.cpp | 76 +
clang/tools/libclang/CIndex.cpp | 7 +
clang/tools/libclang/CXCursor.cpp | 3 +
llvm/include/llvm/Frontend/OpenMP/OMP.td | 4 +
.../runtime/test/transform/fuse/foreach.cpp | 192 +++
openmp/runtime/test/transform/fuse/intfor.c | 50 +
.../runtime/test/transform/fuse/iterfor.cpp | 194 +++
.../fuse/parallel-wsloop-collapse-foreach.cpp | 208 +++
.../fuse/parallel-wsloop-collapse-intfor.c | 45 +
31 files changed, 3391 insertions(+), 4 deletions(-)
create mode 100644 clang/test/OpenMP/fuse_ast_print.cpp
create mode 100644 clang/test/OpenMP/fuse_codegen.cpp
create mode 100644 clang/test/OpenMP/fuse_messages.cpp
create mode 100644 openmp/runtime/test/transform/fuse/foreach.cpp
create mode 100644 openmp/runtime/test/transform/fuse/intfor.c
create mode 100644 openmp/runtime/test/transform/fuse/iterfor.cpp
create mode 100644 openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp
create mode 100644 openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c
diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h
index d30d15e53802a..00046de62a742 100644
--- a/clang/include/clang-c/Index.h
+++ b/clang/include/clang-c/Index.h
@@ -2162,6 +2162,10 @@ enum CXCursorKind {
*/
CXCursor_OMPStripeDirective = 310,
+ /** OpenMP fuse directive
+ */
+ CXCursor_OMPFuseDirective = 318,
+
/** OpenACC Compute Construct.
*/
CXCursor_OpenACCComputeConstruct = 320,
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 3edc8684d0a19..e712a47f1639c 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -3078,6 +3078,9 @@ DEF_TRAVERSE_STMT(OMPUnrollDirective,
DEF_TRAVERSE_STMT(OMPReverseDirective,
{ TRY_TO(TraverseOMPExecutableDirective(S)); })
+DEF_TRAVERSE_STMT(OMPFuseDirective,
+ { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
DEF_TRAVERSE_STMT(OMPInterchangeDirective,
{ TRY_TO(TraverseOMPExecutableDirective(S)); })
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index 736bcabbad1f7..dc6f797e24ab8 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -962,6 +962,9 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective {
/// Number of loops generated by this loop transformation.
unsigned NumGeneratedLoops = 0;
+ /// Number of top level canonical loop nests generated by this loop
+ /// transformation
+ unsigned NumGeneratedLoopNests = 0;
protected:
explicit OMPLoopTransformationDirective(StmtClass SC,
@@ -973,6 +976,9 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective {
/// Set the number of loops generated by this loop transformation.
void setNumGeneratedLoops(unsigned Num) { NumGeneratedLoops = Num; }
+ /// Set the number of top level canonical loop nests generated by this loop
+ /// transformation
+ void setNumGeneratedLoopNests(unsigned Num) { NumGeneratedLoopNests = Num; }
public:
/// Return the number of associated (consumed) loops.
@@ -981,6 +987,10 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective {
/// Return the number of loops generated by this loop transformation.
unsigned getNumGeneratedLoops() const { return NumGeneratedLoops; }
+ /// Return the number of top level canonical loop nests generated by this loop
+ /// transformation
+ unsigned getNumGeneratedLoopNests() const { return NumGeneratedLoopNests; }
+
/// Get the de-sugared statements after the loop transformation.
///
/// Might be nullptr if either the directive generates no loops and is handled
@@ -995,7 +1005,8 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective {
Stmt::StmtClass C = T->getStmtClass();
return C == OMPTileDirectiveClass || C == OMPUnrollDirectiveClass ||
C == OMPReverseDirectiveClass || C == OMPInterchangeDirectiveClass ||
- C == OMPStripeDirectiveClass;
+ C == OMPStripeDirectiveClass ||
+ C == OMPFuseDirectiveClass;
}
};
@@ -5562,6 +5573,7 @@ class OMPTileDirective final : public OMPLoopTransformationDirective {
llvm::omp::OMPD_tile, StartLoc, EndLoc,
NumLoops) {
setNumGeneratedLoops(2 * NumLoops);
+ setNumGeneratedLoopNests(1);
}
void setPreInits(Stmt *PreInits) {
@@ -5790,7 +5802,11 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective {
explicit OMPReverseDirective(SourceLocation StartLoc, SourceLocation EndLoc)
: OMPLoopTransformationDirective(OMPReverseDirectiveClass,
llvm::omp::OMPD_reverse, StartLoc,
- EndLoc, 1) {}
+ EndLoc, 1) {
+
+ setNumGeneratedLoopNests(1);
+ setNumGeneratedLoops(1);
+ }
void setPreInits(Stmt *PreInits) {
Data->getChildren()[PreInitsOffset] = PreInits;
@@ -5857,7 +5873,8 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective {
: OMPLoopTransformationDirective(OMPInterchangeDirectiveClass,
llvm::omp::OMPD_interchange, StartLoc,
EndLoc, NumLoops) {
- setNumGeneratedLoops(3 * NumLoops);
+ setNumGeneratedLoops(NumLoops);
+ setNumGeneratedLoopNests(1);
}
void setPreInits(Stmt *PreInits) {
@@ -5908,6 +5925,88 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective {
}
};
+/// Represents the '#pragma omp fuse' loop transformation directive
+///
+/// \code{c}
+/// #pragma omp fuse
+/// {
+/// for(int i = 0; i < m1; ++i) {...}
+/// for(int j = 0; j < m2; ++j) {...}
+/// ...
+/// }
+/// \endcode
+
+class OMPFuseDirective final : public OMPLoopTransformationDirective {
+ friend class ASTStmtReader;
+ friend class OMPExecutableDirective;
+
+ // Offsets of child members.
+ enum {
+ PreInitsOffset = 0,
+ TransformedStmtOffset,
+ };
+
+ explicit OMPFuseDirective(SourceLocation StartLoc, SourceLocation EndLoc,
+ unsigned NumLoops)
+ : OMPLoopTransformationDirective(OMPFuseDirectiveClass,
+ llvm::omp::OMPD_fuse, StartLoc, EndLoc,
+ NumLoops) {
+ setNumGeneratedLoops(1);
+ // TODO: After implementing the looprange clause, change this logic
+ setNumGeneratedLoopNests(1);
+ }
+
+ void setPreInits(Stmt *PreInits) {
+ Data->getChildren()[PreInitsOffset] = PreInits;
+ }
+
+ void setTransformedStmt(Stmt *S) {
+ Data->getChildren()[TransformedStmtOffset] = S;
+ }
+
+public:
+ /// Create a new AST node representation for #pragma omp fuse'
+ ///
+ /// \param C Context of the AST
+ /// \param StartLoc Location of the introducer (e.g the 'omp' token)
+ /// \param EndLoc Location of the directive's end (e.g the tok::eod)
+ /// \param Clauses The directive's clauses
+ /// \param NumLoops Number of total affected loops
+ /// \param NumLoopNests Number of affected top level canonical loops
+ /// (number of items in the 'looprange' clause if present)
+ /// \param AssociatedStmt The outermost associated loop
+ /// \param TransformedStmt The loop nest after fusion, or nullptr in
+ /// dependent
+ /// \param PreInits Helper preinits statements for the loop nest
+ static OMPFuseDirective *Create(const ASTContext &C, SourceLocation StartLoc,
+ SourceLocation EndLoc,
+ ArrayRef<OMPClause *> Clauses,
+ unsigned NumLoops, unsigned NumLoopNests,
+ Stmt *AssociatedStmt, Stmt *TransformedStmt,
+ Stmt *PreInits);
+
+ /// Build an empty '#pragma omp fuse' AST node for deserialization
+ ///
+ /// \param C Context of the AST
+ /// \param NumClauses Number of clauses to allocate
+ /// \param NumLoops Number of associated loops to allocate
+ static OMPFuseDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses,
+ unsigned NumLoops);
+
+ /// Gets the associated loops after the transformation. This is the de-sugared
+ /// replacement or nulltpr in dependent contexts.
+ Stmt *getTransformedStmt() const {
+ return Data->getChildren()[TransformedStmtOffset];
+ }
+
+ /// Return preinits statement.
+ Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; }
+
+ static bool classof(const Stmt *T) {
+ return T->getStmtClass() == OMPFuseDirectiveClass;
+ }
+};
+
/// This represents '#pragma omp scan' directive.
///
/// \code
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index e1b9ed0647bb9..640db20f82e0b 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11516,6 +11516,14 @@ def note_omp_implicit_dsa : Note<
"implicitly determined as %0">;
def err_omp_loop_var_dsa : Error<
"loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">;
+def warn_omp_different_loop_ind_var_types : Warning <
+ "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">;
+def err_omp_not_canonical_loop : Error <
+ "loop after '#pragma omp %0' is not in canonical form">;
+def err_omp_not_a_loop_sequence : Error <
+ "statement after '#pragma omp %0' must be a loop sequence containing canonical loops or loop-generating constructs">;
+def err_omp_empty_loop_sequence : Error <
+ "loop sequence after '#pragma omp %0' must contain at least 1 canonical loop or loop-generating construct">;
def err_omp_not_for : Error<
"%select{statement after '#pragma omp %1' must be a for loop|"
"expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">;
diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index 9526fa5808aa5..739160342062c 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -234,6 +234,7 @@ def OMPStripeDirective : StmtNode<OMPLoopTransformationDirective>;
def OMPUnrollDirective : StmtNode<OMPLoopTransformationDirective>;
def OMPReverseDirective : StmtNode<OMPLoopTransformationDirective>;
def OMPInterchangeDirective : StmtNode<OMPLoopTransformationDirective>;
+def OMPFuseDirective : StmtNode<OMPLoopTransformationDirective>;
def OMPForDirective : StmtNode<OMPLoopDirective>;
def OMPForSimdDirective : StmtNode<OMPLoopDirective>;
def OMPSectionsDirective : StmtNode<OMPExecutableDirective>;
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index 6498390fe96f7..8d78c2197c89d 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -457,6 +457,13 @@ class SemaOpenMP : public SemaBase {
Stmt *AStmt,
SourceLocation StartLoc,
SourceLocation EndLoc);
+
+ /// Called on well-formed '#pragma omp fuse' after parsing of its
+ /// clauses and the associated statement.
+ StmtResult ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
+ Stmt *AStmt, SourceLocation StartLoc,
+ SourceLocation EndLoc);
+
/// Called on well-formed '\#pragma omp for' after parsing
/// of the associated statement.
StmtResult
@@ -1480,6 +1487,26 @@ class SemaOpenMP : public SemaBase {
SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits);
+ /// Analyzes and checks a loop sequence for use by a loop transformation
+ ///
+ /// \param Kind The loop transformation directive kind.
+ /// \param NumLoops [out] Number of total canonical loops
+ /// \param LoopSeqSize [out] Number of top level canonical loops
+ /// \param LoopHelpers [out] The multiple loop analyses results.
+ /// \param LoopStmts [out] The multiple Stmt of each For loop.
+ /// \param OriginalInits [out] The multiple collection of statements and
+ /// declarations that must have been executed/declared
+ /// before entering the loop.
+ /// \param Context
+ /// \return Whether there was an absence of errors or not
+ bool checkTransformableLoopSequence(
+ OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize,
+ unsigned &NumLoops,
+ SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
+ SmallVectorImpl<Stmt *> &ForStmts,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
+ ASTContext &Context);
+
/// Helper to keep information about the current `omp begin/end declare
/// variant` nesting.
struct OMPDeclareVariantScope {
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 5cb9998126a85..8fe9d8248d66f 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1948,6 +1948,7 @@ enum StmtCode {
STMT_OMP_UNROLL_DIRECTIVE,
STMT_OMP_REVERSE_DIRECTIVE,
STMT_OMP_INTERCHANGE_DIRECTIVE,
+ STMT_OMP_FUSE_DIRECTIVE,
STMT_OMP_FOR_DIRECTIVE,
STMT_OMP_FOR_SIMD_DIRECTIVE,
STMT_OMP_SECTIONS_DIRECTIVE,
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index 4f8b50e179e30..f050e9063f1fc 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -456,6 +456,8 @@ OMPUnrollDirective::Create(const ASTContext &C, SourceLocation StartLoc,
auto *Dir = createDirective<OMPUnrollDirective>(
C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc);
Dir->setNumGeneratedLoops(NumGeneratedLoops);
+ // The number of generated loops and loop nests during unroll matches
+ Dir->setNumGeneratedLoopNests(NumGeneratedLoops);
Dir->setTransformedStmt(TransformedStmt);
Dir->setPreInits(PreInits);
return Dir;
@@ -505,6 +507,29 @@ OMPInterchangeDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
SourceLocation(), SourceLocation(), NumLoops);
}
+OMPFuseDirective *OMPFuseDirective::Create(
+ const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+ ArrayRef<OMPClause *> Clauses, unsigned NumLoops, unsigned NumLoopNests,
+ Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits) {
+
+ OMPFuseDirective *Dir = createDirective<OMPFuseDirective>(
+ C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc,
+ NumLoops);
+ Dir->setTransformedStmt(TransformedStmt);
+ Dir->setPreInits(PreInits);
+ Dir->setNumGeneratedLoopNests(NumLoopNests);
+ Dir->setNumGeneratedLoops(NumLoops);
+ return Dir;
+}
+
+OMPFuseDirective *OMPFuseDirective::CreateEmpty(const ASTContext &C,
+ unsigned NumClauses,
+ unsigned NumLoops) {
+ return createEmptyDirective<OMPFuseDirective>(
+ C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1,
+ SourceLocation(), SourceLocation(), NumLoops);
+}
+
OMPForSimdDirective *
OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
SourceLocation EndLoc, unsigned CollapsedNum,
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index c6c49c6c1ba4d..ec0becea8f55c 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -789,6 +789,11 @@ void StmtPrinter::VisitOMPInterchangeDirective(OMPInterchangeDirective *Node) {
PrintOMPExecutableDirective(Node);
}
+void StmtPrinter::VisitOMPFuseDirective(OMPFuseDirective *Node) {
+ Indent() << "#pragma omp fuse";
+ PrintOMPExecutableDirective(Node);
+}
+
void StmtPrinter::VisitOMPForDirective(OMPForDirective *Node) {
Indent() << "#pragma omp for";
PrintOMPExecutableDirective(Node);
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 83d54da9be7e5..933ad19b7a8ef 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -1026,6 +1026,10 @@ void StmtProfiler::VisitOMPInterchangeDirective(
VisitOMPLoopTransformationDirective(S);
}
+void StmtProfiler::VisitOMPFuseDirective(const OMPFuseDirective *S) {
+ VisitOMPLoopTransformationDirective(S);
+}
+
void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) {
VisitOMPLoopDirective(S);
}
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index 7b90861c78de0..e18867e3c0281 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -702,7 +702,7 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) {
bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) {
return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse ||
- DKind == OMPD_interchange || DKind == OMPD_stripe;
+ DKind == OMPD_interchange || DKind == OMPD_stripe || DKind == OMPD_fuse;
}
bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) {
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 3562b4ea22a24..4a2dc1a537d46 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -233,6 +233,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
case Stmt::OMPInterchangeDirectiveClass:
EmitOMPInterchangeDirective(cast<OMPInterchangeDirective>(*S));
break;
+ case Stmt::OMPFuseDirectiveClass:
+ EmitOMPFuseDirective(cast<OMPFuseDirective>(*S));
+ break;
case Stmt::OMPForDirectiveClass:
EmitOMPForDirective(cast<OMPForDirective>(*S));
break;
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 803c7ed37635e..0c664b0f89044 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -197,6 +197,8 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
} else if (const auto *Interchange =
dyn_cast<OMPInterchangeDirective>(&S)) {
PreInits = Interchange->getPreInits();
+ } else if (const auto *Fuse = dyn_cast<OMPFuseDirective>(&S)) {
+ PreInits = Fuse->getPreInits();
} else {
llvm_unreachable("Unknown loop-based directive kind.");
}
@@ -2918,6 +2920,12 @@ void CodeGenFunction::EmitOMPInterchangeDirective(
EmitStmt(S.getTransformedStmt());
}
+void CodeGenFunction::EmitOMPFuseDirective(const OMPFuseDirective &S) {
+ // Emit the de-sugared statement
+ OMPTransformDirectiveScopeRAII FuseScope(*this, &S);
+ EmitStmt(S.getTransformedStmt());
+}
+
void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index c0bc3825f0188..59cb4d9caa98d 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3871,6 +3871,7 @@ class CodeGenFunction : public CodeGenTypeCache {
void EmitOMPUnrollDirective(const OMPUnrollDirective &S);
void EmitOMPReverseDirective(const OMPReverseDirective &S);
void EmitOMPInterchangeDirective(const OMPInterchangeDirective &S);
+ void EmitOMPFuseDirective(const OMPFuseDirective &S);
void EmitOMPForDirective(const OMPForDirective &S);
void EmitOMPForSimdDirective(const OMPForSimdDirective &S);
void EmitOMPScopeDirective(const OMPScopeDirective &S);
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index aaa2bb22565e4..f6ff77937f54b 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1492,6 +1492,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
case Stmt::OMPUnrollDirectiveClass:
case Stmt::OMPReverseDirectiveClass:
case Stmt::OMPInterchangeDirectiveClass:
+ case Stmt::OMPFuseDirectiveClass:
case Stmt::OMPSingleDirectiveClass:
case Stmt::OMPTargetDataDirectiveClass:
case Stmt::OMPTargetDirectiveClass:
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 835dba22a858d..c9885518217f3 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -4398,6 +4398,7 @@ void SemaOpenMP::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind,
case OMPD_unroll:
case OMPD_reverse:
case OMPD_interchange:
+ case OMPD_fuse:
case OMPD_assume:
break;
default:
@@ -6209,6 +6210,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective(
Res = ActOnOpenMPInterchangeDirective(ClausesWithImplicit, AStmt, StartLoc,
EndLoc);
break;
+ case OMPD_fuse:
+ Res =
+ ActOnOpenMPFuseDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc);
+ break;
case OMPD_for:
Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc,
VarsWithInheritedDSA);
@@ -14161,6 +14166,8 @@ bool SemaOpenMP::checkTransformableLoopNest(
DependentPreInits = Dir->getPreInits();
else if (auto *Dir = dyn_cast<OMPInterchangeDirective>(Transform))
DependentPreInits = Dir->getPreInits();
+ else if (auto *Dir = dyn_cast<OMPFuseDirective>(Transform))
+ DependentPreInits = Dir->getPreInits();
else
llvm_unreachable("Unhandled loop transformation");
@@ -14171,6 +14178,265 @@ bool SemaOpenMP::checkTransformableLoopNest(
return Result;
}
+class NestedLoopCounterVisitor
+ : public clang::RecursiveASTVisitor<NestedLoopCounterVisitor> {
+public:
+ explicit NestedLoopCounterVisitor() : NestedLoopCount(0) {}
+
+ bool VisitForStmt(clang::ForStmt *FS) {
+ ++NestedLoopCount;
+ return true;
+ }
+
+ bool VisitCXXForRangeStmt(clang::CXXForRangeStmt *FRS) {
+ ++NestedLoopCount;
+ return true;
+ }
+
+ unsigned getNestedLoopCount() const { return NestedLoopCount; }
+
+private:
+ unsigned NestedLoopCount;
+};
+
+bool SemaOpenMP::checkTransformableLoopSequence(
+ OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize,
+ unsigned &NumLoops,
+ SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
+ SmallVectorImpl<Stmt *> &ForStmts,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
+ ASTContext &Context) {
+
+ // Checks whether the given statement is a compound statement
+ VarsWithInheritedDSAType TmpDSA;
+ if (!isa<CompoundStmt>(AStmt)) {
+ Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence)
+ << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+ // Callback for updating pre-inits in case there are even more
+ // loop-sequence-generating-constructs inside of the main compound stmt
+ auto OnTransformationCallback =
+ [&OriginalInits](OMPLoopBasedDirective *Transform) {
+ Stmt *DependentPreInits;
+ if (auto *Dir = dyn_cast<OMPTileDirective>(Transform))
+ DependentPreInits = Dir->getPreInits();
+ else if (auto *Dir = dyn_cast<OMPUnrollDirective>(Transform))
+ DependentPreInits = Dir->getPreInits();
+ else if (auto *Dir = dyn_cast<OMPReverseDirective>(Transform))
+ DependentPreInits = Dir->getPreInits();
+ else if (auto *Dir = dyn_cast<OMPInterchangeDirective>(Transform))
+ DependentPreInits = Dir->getPreInits();
+ else if (auto *Dir = dyn_cast<OMPFuseDirective>(Transform))
+ DependentPreInits = Dir->getPreInits();
+ else
+ llvm_unreachable("Unhandled loop transformation");
+
+ appendFlattenedStmtList(OriginalInits.back(), DependentPreInits);
+ };
+
+ // Number of top level canonical loop nests observed (And acts as index)
+ LoopSeqSize = 0;
+ // Number of total observed loops
+ NumLoops = 0;
+
+ // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows
+ // the grammar:
+ //
+ // canonical-loop-sequence:
+ // {
+ // loop-sequence+
+ // }
+ // where loop-sequence can be any of the following:
+ // 1. canonical-loop-sequence
+ // 2. loop-nest
+ // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective)
+ //
+ // To recognise and traverse this structure the following helper functions
+ // have been defined. handleLoopSequence serves as the recurisve entry point
+ // and tries to match the input AST to the canonical loop sequence grammar
+ // structure
+
+ auto NLCV = NestedLoopCounterVisitor();
+ // Helper functions to validate canonical loop sequence grammar is valid
+ auto isLoopSequenceDerivation = [](auto *Child) {
+ return isa<ForStmt>(Child) || isa<CXXForRangeStmt>(Child) ||
+ isa<OMPLoopTransformationDirective>(Child);
+ };
+ auto isLoopGeneratingStmt = [](auto *Child) {
+ return isa<OMPLoopTransformationDirective>(Child);
+ };
+
+ // Helper Lambda to handle storing initialization and body statements for both
+ // ForStmt and CXXForRangeStmt and checks for any possible mismatch between
+ // induction variables types
+ QualType BaseInductionVarType;
+ auto storeLoopStatements = [&OriginalInits, &ForStmts, &BaseInductionVarType,
+ this, &Context](Stmt *LoopStmt) {
+ if (auto *For = dyn_cast<ForStmt>(LoopStmt)) {
+ OriginalInits.back().push_back(For->getInit());
+ ForStmts.push_back(For);
+ // Extract induction variable
+ if (auto *InitStmt = dyn_cast_or_null<DeclStmt>(For->getInit())) {
+ if (auto *InitDecl = dyn_cast<VarDecl>(InitStmt->getSingleDecl())) {
+ QualType InductionVarType = InitDecl->getType().getCanonicalType();
+
+ // Compare with first loop type
+ if (BaseInductionVarType.isNull()) {
+ BaseInductionVarType = InductionVarType;
+ } else if (!Context.hasSameType(BaseInductionVarType,
+ InductionVarType)) {
+ Diag(InitDecl->getBeginLoc(),
+ diag::warn_omp_different_loop_ind_var_types)
+ << getOpenMPDirectiveName(OMPD_fuse) << BaseInductionVarType
+ << InductionVarType;
+ }
+ }
+ }
+
+ } else {
+ assert(isa<CXXForRangeStmt>(LoopStmt) &&
+ "Expected canonical for or range-based for loops.");
+ auto *CXXFor = dyn_cast<CXXForRangeStmt>(LoopStmt);
+ OriginalInits.back().push_back(CXXFor->getBeginStmt());
+ ForStmts.push_back(CXXFor);
+ }
+ };
+ // Helper lambda functions to encapsulate the processing of different
+ // derivations of the canonical loop sequence grammar
+ //
+ // Modularized code for handling loop generation and transformations
+ auto handleLoopGeneration = [&storeLoopStatements, &LoopHelpers,
+ &OriginalInits, &LoopSeqSize, &NumLoops, Kind,
+ &TmpDSA, &OnTransformationCallback,
+ this](Stmt *Child) {
+ auto LoopTransform = dyn_cast<OMPLoopTransformationDirective>(Child);
+ Stmt *TransformedStmt = LoopTransform->getTransformedStmt();
+ unsigned NumGeneratedLoopNests = LoopTransform->getNumGeneratedLoopNests();
+
+ // Handle the case where transformed statement is not available due to
+ // dependent contexts
+ if (!TransformedStmt) {
+ if (NumGeneratedLoopNests > 0)
+ return true;
+ // Unroll full
+ else {
+ Diag(Child->getBeginLoc(), diag::err_omp_not_for)
+ << 0 << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+ }
+ // Handle loop transformations with multiple loop nests
+ // Unroll full
+ if (NumGeneratedLoopNests <= 0) {
+ Diag(Child->getBeginLoc(), diag::err_omp_not_for)
+ << 0 << getOpenMPDirectiveName(Kind);
+ return false;
+ // Future loop transformations that generate multiple canonical loops
+ } else if (NumGeneratedLoopNests > 1) {
+ llvm_unreachable("Multiple canonical loop generating transformations "
+ "like loop splitting are not yet supported");
+ }
+
+ // Process the transformed loop statement
+ Child = TransformedStmt;
+ OriginalInits.emplace_back();
+ LoopHelpers.emplace_back();
+ OnTransformationCallback(LoopTransform);
+
+ unsigned IsCanonical =
+ checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack,
+ TmpDSA, LoopHelpers[LoopSeqSize]);
+
+ if (!IsCanonical) {
+ Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop)
+ << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+ storeLoopStatements(TransformedStmt);
+ NumLoops += LoopTransform->getNumGeneratedLoops();
+ return true;
+ };
+
+ // Modularized code for handling regular canonical loops
+ auto handleRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits,
+ &LoopSeqSize, &NumLoops, Kind, &TmpDSA, &NLCV,
+ this](Stmt *Child) {
+ OriginalInits.emplace_back();
+ LoopHelpers.emplace_back();
+ unsigned IsCanonical =
+ checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack,
+ TmpDSA, LoopHelpers[LoopSeqSize]);
+
+ if (!IsCanonical) {
+ Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop)
+ << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+ storeLoopStatements(Child);
+ NumLoops += NLCV.TraverseStmt(Child);
+ return true;
+ };
+
+ // Helper function to process a Loop Sequence Recursively
+ auto handleLoopSequence = [&](Stmt *LoopSeqStmt,
+ auto &handleLoopSequenceCallback) -> bool {
+ for (auto *Child : LoopSeqStmt->children()) {
+ if (!Child)
+ continue;
+
+ // Skip over non-loop-sequence statements
+ if (!isLoopSequenceDerivation(Child)) {
+ Child = Child->IgnoreContainers();
+
+ // Ignore empty compound statement
+ if (!Child)
+ continue;
+
+ // In the case of a nested loop sequence ignoring containers would not
+ // be enough, a recurisve transversal of the loop sequence is required
+ if (isa<CompoundStmt>(Child)) {
+ if (!handleLoopSequenceCallback(Child, handleLoopSequenceCallback))
+ return false;
+ // Already been treated, skip this children
+ continue;
+ }
+ }
+ // Regular loop sequence handling
+ if (isLoopSequenceDerivation(Child)) {
+ if (isLoopGeneratingStmt(Child)) {
+ if (!handleLoopGeneration(Child)) {
+ return false;
+ }
+ } else {
+ if (!handleRegularLoop(Child)) {
+ return false;
+ }
+ }
+ ++LoopSeqSize;
+ } else {
+ // Report error for invalid statement inside canonical loop sequence
+ Diag(Child->getBeginLoc(), diag::err_omp_not_for)
+ << 0 << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+ }
+ return true;
+ };
+
+ // Recursive entry point to process the main loop sequence
+ if (!handleLoopSequence(AStmt, handleLoopSequence)) {
+ return false;
+ }
+
+ if (LoopSeqSize <= 0) {
+ Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence)
+ << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+ return true;
+}
+
/// Add preinit statements that need to be propageted from the selected loop.
static void addLoopPreInits(ASTContext &Context,
OMPLoopBasedDirective::HelperExprs &LoopHelper,
@@ -15416,6 +15682,340 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective(
buildPreInits(Context, PreInits));
}
+StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
+ Stmt *AStmt,
+ SourceLocation StartLoc,
+ SourceLocation EndLoc) {
+ ASTContext &Context = getASTContext();
+ DeclContext *CurrContext = SemaRef.CurContext;
+ Scope *CurScope = SemaRef.getCurScope();
+ CaptureVars CopyTransformer(SemaRef);
+
+ // Ensure the structured block is not empty
+ if (!AStmt) {
+ return StmtError();
+ }
+ // Validate that the potential loop sequence is transformable for fusion
+ // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops
+ SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers;
+ SmallVector<Stmt *> LoopStmts;
+ SmallVector<SmallVector<Stmt *, 0>> OriginalInits;
+
+ unsigned NumLoops;
+ // TODO: Support looprange clause using LoopSeqSize
+ unsigned LoopSeqSize;
+ if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops,
+ LoopHelpers, LoopStmts, OriginalInits,
+ Context)) {
+ return StmtError();
+ }
+
+ // Defer transformation in dependent contexts
+ if (CurrContext->isDependentContext()) {
+ return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses,
+ NumLoops, 1, AStmt, nullptr, nullptr);
+ }
+ assert(LoopHelpers.size() == LoopSeqSize &&
+ "Expecting loop iteration space dimensionality to match number of "
+ "affected loops");
+ assert(OriginalInits.size() == LoopSeqSize &&
+ "Expecting loop iteration space dimensionality to match number of "
+ "affected loops");
+
+ // PreInits hold a sequence of variable declarations that must be executed
+ // before the fused loop begins. These include bounds, strides, and other
+ // helper variables required for the transformation.
+ SmallVector<Stmt *> PreInits;
+
+ // Select the type with the largest bit width among all induction variables
+ QualType IVType = LoopHelpers[0].IterationVarRef->getType();
+ for (unsigned int I = 1; I < LoopSeqSize; ++I) {
+ QualType CurrentIVType = LoopHelpers[I].IterationVarRef->getType();
+ if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) {
+ IVType = CurrentIVType;
+ }
+ }
+ uint64_t IVBitWidth = Context.getIntWidth(IVType);
+
+ // Create pre-init declarations for all loops lower bounds, upper bounds,
+ // strides and num-iterations
+ SmallVector<VarDecl *, 4> LBVarDecls;
+ SmallVector<VarDecl *, 4> STVarDecls;
+ SmallVector<VarDecl *, 4> NIVarDecls;
+ SmallVector<VarDecl *, 4> UBVarDecls;
+ SmallVector<VarDecl *, 4> IVVarDecls;
+
+ // Helper lambda to create variables for bounds, strides, and other
+ // expressions. Generates both the variable declaration and the corresponding
+ // initialization statement.
+ auto CreateHelperVarAndStmt =
+ [&SemaRef = this->SemaRef, &Context, &CopyTransformer,
+ &IVType](Expr *ExprToCopy, const std::string &BaseName, unsigned I,
+ bool NeedsNewVD = false) {
+ Expr *TransformedExpr =
+ AssertSuccess(CopyTransformer.TransformExpr(ExprToCopy));
+ if (!TransformedExpr)
+ return std::pair<VarDecl *, StmtResult>(nullptr, StmtError());
+
+ auto Name = (Twine(".omp.") + BaseName + std::to_string(I)).str();
+
+ VarDecl *VD;
+ if (NeedsNewVD) {
+ VD = buildVarDecl(SemaRef, SourceLocation(), IVType, Name);
+ SemaRef.AddInitializerToDecl(VD, TransformedExpr, false);
+
+ } else {
+ // Create a unique variable name
+ DeclRefExpr *DRE = cast<DeclRefExpr>(TransformedExpr);
+ VD = cast<VarDecl>(DRE->getDecl());
+ VD->setDeclName(&SemaRef.PP.getIdentifierTable().get(Name));
+ }
+ // Create the corresponding declaration statement
+ StmtResult DeclStmt = new (Context) class DeclStmt(
+ DeclGroupRef(VD), SourceLocation(), SourceLocation());
+ return std::make_pair(VD, DeclStmt);
+ };
+
+ // Process each single loop to generate and collect declarations
+ // and statements for all helper expressions
+ for (unsigned int I = 0; I < LoopSeqSize; ++I) {
+ addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I],
+ PreInits);
+
+ auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", I);
+ auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", I);
+ auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", I);
+ auto [NIVD, NIDStmt] =
+ CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", I, true);
+ auto [IVVD, IVDStmt] =
+ CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", I);
+
+ if (!LBVD || !STVD || !NIVD || !IVVD)
+ return StmtError();
+
+ UBVarDecls.push_back(UBVD);
+ LBVarDecls.push_back(LBVD);
+ STVarDecls.push_back(STVD);
+ NIVarDecls.push_back(NIVD);
+ IVVarDecls.push_back(IVVD);
+
+ PreInits.push_back(UBDStmt.get());
+ PreInits.push_back(LBDStmt.get());
+ PreInits.push_back(STDStmt.get());
+ PreInits.push_back(NIDStmt.get());
+ PreInits.push_back(IVDStmt.get());
+ }
+
+ auto MakeVarDeclRef = [&SemaRef = this->SemaRef](VarDecl *VD) {
+ return buildDeclRefExpr(SemaRef, VD, VD->getType(), VD->getLocation(),
+ false);
+ };
+
+ // Following up the creation of the final fused loop will be performed
+ // which has the following shape (considering the selected loops):
+ //
+ // for (fuse.index = 0; fuse.index < max(ni0, ni1..., nik); ++fuse.index) {
+ // if (fuse.index < ni0){
+ // iv0 = lb0 + st0 * fuse.index;
+ // original.index0 = iv0
+ // body(0);
+ // }
+ // if (fuse.index < ni1){
+ // iv1 = lb1 + st1 * fuse.index;
+ // original.index1 = iv1
+ // body(1);
+ // }
+ //
+ // ...
+ //
+ // if (fuse.index < nik){
+ // ivk = lbk + stk * fuse.index;
+ // original.indexk = ivk
+ // body(k); Expr *InitVal = IntegerLiteral::Create(Context,
+ // llvm::APInt(IVWidth, 0),
+
+ // }
+
+ // 1. Create the initialized fuse index
+ const std::string IndexName = Twine(".omp.fuse.index").str();
+ Expr *InitVal = IntegerLiteral::Create(Context, llvm::APInt(IVBitWidth, 0),
+ IVType, SourceLocation());
+ VarDecl *IndexDecl =
+ buildVarDecl(SemaRef, {}, IVType, IndexName, nullptr, nullptr);
+ SemaRef.AddInitializerToDecl(IndexDecl, InitVal, false);
+ StmtResult InitStmt = new (Context)
+ DeclStmt(DeclGroupRef(IndexDecl), SourceLocation(), SourceLocation());
+
+ if (!InitStmt.isUsable())
+ return StmtError();
+
+ auto MakeIVRef = [&SemaRef = this->SemaRef, IndexDecl, IVType,
+ Loc = InitVal->getExprLoc()]() {
+ return buildDeclRefExpr(SemaRef, IndexDecl, IVType, Loc, false);
+ };
+
+ // 2. Iteratively compute the max number of logical iterations Max(NI_1, NI_2,
+ // ..., NI_k)
+ //
+ // This loop accumulates the maximum value across multiple expressions,
+ // ensuring each step constructs a unique AST node for correctness. By using
+ // intermediate temporary variables and conditional operators, we maintain
+ // distinct nodes and avoid duplicating subtrees, For instance, max(a,b,c):
+ // omp.temp0 = max(a, b)
+ // omp.temp1 = max(omp.temp0, c)
+ // omp.fuse.max = max(omp.temp1, omp.temp0)
+
+ ExprResult MaxExpr;
+ for (unsigned I = 0; I < LoopSeqSize; ++I) {
+ DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[I]);
+ QualType NITy = NIRef->getType();
+
+ if (MaxExpr.isUnset()) {
+ // Initialize MaxExpr with the first NI expression
+ MaxExpr = NIRef;
+ } else {
+ // Create a new acummulator variable t_i = MaxExpr
+ std::string TempName = (Twine(".omp.temp.") + Twine(I)).str();
+ VarDecl *TempDecl =
+ buildVarDecl(SemaRef, {}, NITy, TempName, nullptr, nullptr);
+ TempDecl->setInit(MaxExpr.get());
+ DeclRefExpr *TempRef =
+ buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false);
+ DeclRefExpr *TempRef2 =
+ buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false);
+ // Add a DeclStmt to PreInits to ensure the variable is declared.
+ StmtResult TempStmt = new (Context)
+ DeclStmt(DeclGroupRef(TempDecl), SourceLocation(), SourceLocation());
+
+ if (!TempStmt.isUsable())
+ return StmtError();
+ PreInits.push_back(TempStmt.get());
+
+ // Build MaxExpr <-(MaxExpr > NIRef ? MaxExpr : NIRef)
+ ExprResult Comparison =
+ SemaRef.BuildBinOp(nullptr, SourceLocation(), BO_GT, TempRef, NIRef);
+ // Handle any errors in Comparison creation
+ if (!Comparison.isUsable())
+ return StmtError();
+
+ DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[I]);
+ // Update MaxExpr using a conditional expression to hold the max value
+ MaxExpr = new (Context) ConditionalOperator(
+ Comparison.get(), SourceLocation(), TempRef2, SourceLocation(),
+ NIRef2->getExprStmt(), NITy, VK_LValue, OK_Ordinary);
+
+ if (!MaxExpr.isUsable())
+ return StmtError();
+ }
+ }
+ if (!MaxExpr.isUsable())
+ return StmtError();
+
+ // 3. Declare the max variable
+ const std::string MaxName = Twine(".omp.fuse.max").str();
+ VarDecl *MaxDecl =
+ buildVarDecl(SemaRef, {}, IVType, MaxName, nullptr, nullptr);
+ MaxDecl->setInit(MaxExpr.get());
+ DeclRefExpr *MaxRef = buildDeclRefExpr(SemaRef, MaxDecl, IVType, {}, false);
+ StmtResult MaxStmt = new (Context)
+ DeclStmt(DeclGroupRef(MaxDecl), SourceLocation(), SourceLocation());
+
+ if (MaxStmt.isInvalid())
+ return StmtError();
+ PreInits.push_back(MaxStmt.get());
+
+ // 4. Create condition Expr: index < n_max
+ ExprResult CondExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT,
+ MakeIVRef(), MaxRef);
+ if (!CondExpr.isUsable())
+ return StmtError();
+ // 5. Increment Expr: ++index
+ ExprResult IncrExpr =
+ SemaRef.BuildUnaryOp(CurScope, SourceLocation(), UO_PreInc, MakeIVRef());
+ if (!IncrExpr.isUsable())
+ return StmtError();
+
+ // 6. Build the Fused Loop Body
+ // The final fused loop iterates over the maximum logical range. Inside the
+ // loop, each original loop's index is calculated dynamically, and its body
+ // is executed conditionally.
+ //
+ // Each sub-loop's body is guarded by a conditional statement to ensure
+ // it executes only within its logical iteration range:
+ //
+ // if (fuse.index < ni_k){
+ // iv_k = lb_k + st_k * fuse.index;
+ // original.index = iv_k
+ // body(k);
+ // }
+
+ CompoundStmt *FusedBody = nullptr;
+ SmallVector<Stmt *, 4> FusedBodyStmts;
+ for (unsigned I = 0; I < LoopSeqSize; ++I) {
+
+ // Assingment of the original sub-loop index to compute the logical index
+ // IV_k = LB_k + omp.fuse.index * ST_k
+
+ ExprResult IdxExpr =
+ SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul,
+ MakeVarDeclRef(STVarDecls[I]), MakeIVRef());
+ if (!IdxExpr.isUsable())
+ return StmtError();
+ IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Add,
+ MakeVarDeclRef(LBVarDecls[I]), IdxExpr.get());
+
+ if (!IdxExpr.isUsable())
+ return StmtError();
+ IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Assign,
+ MakeVarDeclRef(IVVarDecls[I]), IdxExpr.get());
+ if (!IdxExpr.isUsable())
+ return StmtError();
+
+ // Update the original i_k = IV_k
+ SmallVector<Stmt *, 4> BodyStmts;
+ BodyStmts.push_back(IdxExpr.get());
+ llvm::append_range(BodyStmts, LoopHelpers[I].Updates);
+
+ if (auto *SourceCXXFor = dyn_cast<CXXForRangeStmt>(LoopStmts[I]))
+ BodyStmts.push_back(SourceCXXFor->getLoopVarStmt());
+
+ Stmt *Body = (isa<ForStmt>(LoopStmts[I]))
+ ? cast<ForStmt>(LoopStmts[I])->getBody()
+ : cast<CXXForRangeStmt>(LoopStmts[I])->getBody();
+
+ BodyStmts.push_back(Body);
+
+ CompoundStmt *CombinedBody =
+ CompoundStmt::Create(Context, BodyStmts, FPOptionsOverride(),
+ SourceLocation(), SourceLocation());
+ ExprResult Condition =
+ SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, MakeIVRef(),
+ MakeVarDeclRef(NIVarDecls[I]));
+
+ if (!Condition.isUsable())
+ return StmtError();
+
+ IfStmt *IfStatement = IfStmt::Create(
+ Context, SourceLocation(), IfStatementKind::Ordinary, nullptr, nullptr,
+ Condition.get(), SourceLocation(), SourceLocation(), CombinedBody,
+ SourceLocation(), nullptr);
+
+ FusedBodyStmts.push_back(IfStatement);
+ }
+ FusedBody = CompoundStmt::Create(Context, FusedBodyStmts, FPOptionsOverride(),
+ SourceLocation(), SourceLocation());
+
+ // 7. Construct the final fused loop
+ ForStmt *FusedForStmt = new (Context)
+ ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr, IncrExpr.get(),
+ FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(),
+ IncrExpr.get()->getEndLoc());
+
+ return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, NumLoops,
+ 1, AStmt, FusedForStmt,
+ buildPreInits(Context, PreInits));
+}
+
OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind,
Expr *Expr,
SourceLocation StartLoc,
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 8b4b79c6ec039..39082e06a5a0b 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -9663,6 +9663,17 @@ StmtResult TreeTransform<Derived>::TransformOMPInterchangeDirective(
return Res;
}
+template <typename Derived>
+StmtResult
+TreeTransform<Derived>::TransformOMPFuseDirective(OMPFuseDirective *D) {
+ DeclarationNameInfo DirName;
+ getDerived().getSema().OpenMP().StartOpenMPDSABlock(
+ D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc());
+ StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+ getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get());
+ return Res;
+}
+
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPForDirective(OMPForDirective *D) {
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index f41cfcc53a35d..aee052404874c 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2449,6 +2449,7 @@ void ASTStmtReader::VisitOMPLoopTransformationDirective(
OMPLoopTransformationDirective *D) {
VisitOMPLoopBasedDirective(D);
D->setNumGeneratedLoops(Record.readUInt32());
+ D->setNumGeneratedLoopNests(Record.readUInt32());
}
void ASTStmtReader::VisitOMPTileDirective(OMPTileDirective *D) {
@@ -2471,6 +2472,10 @@ void ASTStmtReader::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) {
VisitOMPLoopTransformationDirective(D);
}
+void ASTStmtReader::VisitOMPFuseDirective(OMPFuseDirective *D) {
+ VisitOMPLoopTransformationDirective(D);
+}
+
void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) {
VisitOMPLoopDirective(D);
D->setHasCancel(Record.readBool());
@@ -3613,6 +3618,12 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
S = OMPReverseDirective::CreateEmpty(Context);
break;
}
+ case STMT_OMP_FUSE_DIRECTIVE: {
+ unsigned NumLoops = Record[ASTStmtReader::NumStmtFields];
+ unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1];
+ S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops);
+ break;
+ }
case STMT_OMP_INTERCHANGE_DIRECTIVE: {
unsigned NumLoops = Record[ASTStmtReader::NumStmtFields];
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index b9eabd5ddb64c..8b909d5c93686 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -2454,6 +2454,7 @@ void ASTStmtWriter::VisitOMPLoopTransformationDirective(
OMPLoopTransformationDirective *D) {
VisitOMPLoopBasedDirective(D);
Record.writeUInt32(D->getNumGeneratedLoops());
+ Record.writeUInt32(D->getNumGeneratedLoopNests());
}
void ASTStmtWriter::VisitOMPTileDirective(OMPTileDirective *D) {
@@ -2481,6 +2482,11 @@ void ASTStmtWriter::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) {
Code = serialization::STMT_OMP_INTERCHANGE_DIRECTIVE;
}
+void ASTStmtWriter::VisitOMPFuseDirective(OMPFuseDirective *D) {
+ VisitOMPLoopTransformationDirective(D);
+ Code = serialization::STMT_OMP_FUSE_DIRECTIVE;
+}
+
void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) {
VisitOMPLoopDirective(D);
Record.writeBool(D->hasCancel());
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 86e2e8f634bfd..457a6daf061b0 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1818,6 +1818,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
case Stmt::OMPStripeDirectiveClass:
case Stmt::OMPTileDirectiveClass:
case Stmt::OMPInterchangeDirectiveClass:
+ case Stmt::OMPFuseDirectiveClass:
case Stmt::OMPInteropDirectiveClass:
case Stmt::OMPDispatchDirectiveClass:
case Stmt::OMPMaskedDirectiveClass:
diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp
new file mode 100644
index 0000000000000..43ce815dab024
--- /dev/null
+++ b/clang/test/OpenMP/fuse_ast_print.cpp
@@ -0,0 +1,278 @@
+// Check no warnings/errors
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -fsyntax-only -verify %s
+// expected-no-diagnostics
+
+// Check AST and unparsing
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -emit-pch -o %t %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-print %s | FileCheck %s --check-prefix=PRINT
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code
+extern "C" void body(...);
+
+// PRINT-LABEL: void foo1(
+// DUMP-LABEL: FunctionDecl {{.*}} foo1
+void foo1() {
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i < 10; i += 2)
+ // DUMP: ForStmt
+ for (int i = 0; i < 10; i += 2)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: for (int j = 10; j > 0; --j)
+ // DUMP: ForStmt
+ for (int j = 10; j > 0; --j)
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+ // PRINT: for (int k = 0; k <= 10; ++k)
+ // DUMP: ForStmt
+ for (int k = 0; k <= 10; ++k)
+ // PRINT: body(k)
+ // DUMP: CallExpr
+ body(k);
+
+ }
+
+}
+
+// PRINT-LABEL: void foo2(
+// DUMP-LABEL: FunctionDecl {{.*}} foo2
+void foo2() {
+ // PRINT: #pragma omp unroll partial(4)
+ // DUMP: OMPUnrollDirective
+ // DUMP-NEXT: OMPPartialClause
+ // DUMP-NEXT: ConstantExpr
+ // DUMP-NEXT: value: Int 4
+ // DUMP-NEXT: IntegerLiteral {{.*}} 4
+ #pragma omp unroll partial(4)
+ // PRINT: #pragma omp fuse
+ // DUMP-NEXT: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i < 10; i += 2)
+ // DUMP: ForStmt
+ for (int i = 0; i < 10; i += 2)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: for (int j = 10; j > 0; --j)
+ // DUMP: ForStmt
+ for (int j = 10; j > 0; --j)
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+ }
+
+}
+
+//PRINT-LABEL: void foo3(
+//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo3
+template<int Factor1, int Factor2>
+void foo3() {
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: #pragma omp unroll partial(Factor1)
+ // DUMP: OMPUnrollDirective
+ #pragma omp unroll partial(Factor1)
+ // PRINT: for (int i = 0; i < 12; i += 1)
+ // DUMP: ForStmt
+ for (int i = 0; i < 12; i += 1)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: #pragma omp unroll partial(Factor2)
+ // DUMP: OMPUnrollDirective
+ #pragma omp unroll partial(Factor2)
+ // PRINT: for (int k = 0; k <= 10; ++k)
+ // DUMP: ForStmt
+ for (int k = 0; k <= 10; ++k)
+ // PRINT: body(k)
+ // DUMP: CallExpr
+ body(k);
+
+ }
+}
+
+// Also test instantiating the template.
+void tfoo3() {
+ foo3<4,2>();
+}
+
+//PRINT-LABEL: void foo4(
+//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo4
+template<typename T, T Step>
+void foo4(int start, int end) {
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (T i = start; i < end; i += Step)
+ // DUMP: ForStmt
+ for (T i = start; i < end; i += Step)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+
+ // PRINT: for (T j = end; j > start; j -= Step)
+ // DUMP: ForStmt
+ for (T j = end; j > start; j -= Step) {
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+ }
+
+ }
+}
+
+// Also test instantiating the template.
+void tfoo4() {
+ foo4<int, 4>(0, 64);
+}
+
+
+
+// PRINT-LABEL: void foo5(
+// DUMP-LABEL: FunctionDecl {{.*}} foo5
+void foo5() {
+ double arr[128], arr2[128];
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT-NEXT: for (auto &&a : arr)
+ // DUMP-NEXT: CXXForRangeStmt
+ for (auto &&a: arr)
+ // PRINT: body(a)
+ // DUMP: CallExpr
+ body(a);
+ // PRINT: for (double v = 42; auto &&b : arr)
+ // DUMP: CXXForRangeStmt
+ for (double v = 42; auto &&b: arr)
+ // PRINT: body(b, v);
+ // DUMP: CallExpr
+ body(b, v);
+ // PRINT: for (auto &&c : arr2)
+ // DUMP: CXXForRangeStmt
+ for (auto &&c: arr2)
+ // PRINT: body(c)
+ // DUMP: CallExpr
+ body(c);
+
+ }
+
+}
+
+// PRINT-LABEL: void foo6(
+// DUMP-LABEL: FunctionDecl {{.*}} foo6
+void foo6() {
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i <= 10; ++i)
+ // DUMP: ForStmt
+ for (int i = 0; i <= 10; ++i)
+ body(i);
+ // PRINT: for (int j = 0; j < 100; ++j)
+ // DUMP: ForStmt
+ for(int j = 0; j < 100; ++j)
+ body(j);
+ }
+ // PRINT: #pragma omp unroll partial(4)
+ // DUMP: OMPUnrollDirective
+ #pragma omp unroll partial(4)
+ // PRINT: for (int k = 0; k < 250; ++k)
+ // DUMP: ForStmt
+ for (int k = 0; k < 250; ++k)
+ body(k);
+ }
+}
+
+// PRINT-LABEL: void foo7(
+// DUMP-LABEL: FunctionDecl {{.*}} foo7
+void foo7() {
+ // PRINT: #pragma omp fuse
+ // DUMP: OMPFuseDirective
+ #pragma omp fuse
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i < 10; i += 2)
+ // DUMP: ForStmt
+ for (int i = 0; i < 10; i += 2)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: for (int j = 10; j > 0; --j)
+ // DUMP: ForStmt
+ for (int j = 10; j > 0; --j)
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+ }
+ }
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int k = 0; k <= 10; ++k)
+ // DUMP: ForStmt
+ for (int k = 0; k <= 10; ++k)
+ // PRINT: body(k)
+ // DUMP: CallExpr
+ body(k);
+ }
+ }
+ }
+ }
+
+}
+
+
+
+
+
+#endif
\ No newline at end of file
diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp
new file mode 100644
index 0000000000000..6c1e21092da43
--- /dev/null
+++ b/clang/test/OpenMP/fuse_codegen.cpp
@@ -0,0 +1,1511 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 5
+// expected-no-diagnostics
+
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2
+
+#ifndef HEADER
+#define HEADER
+
+//placeholder for loop body code.
+extern "C" void body(...) {}
+
+extern "C" void foo1(int start1, int end1, int step1, int start2, int end2, int step2) {
+ int i,j;
+ #pragma omp fuse
+ {
+ for(i = start1; i < end1; i += step1) body(i);
+ for(j = start2; j < end2; j += step2) body(j);
+ }
+
+}
+
+template <typename T>
+void foo2(T start, T end, T step){
+ T i,j,k;
+ #pragma omp fuse
+ {
+ for(i = start; i < end; i += step) body(i);
+ for(j = end; j > start; j -= step) body(j);
+ for(k = start+step; k < end+step; k += step) body(k);
+ }
+}
+
+extern "C" void tfoo2() {
+ foo2<int>(0, 64, 4);
+}
+
+extern "C" void foo3() {
+ double arr[256];
+ #pragma omp fuse
+ {
+ #pragma omp fuse
+ {
+ for(int i = 0; i < 128; ++i) body(i);
+ for(int j = 0; j < 256; j+=2) body(j);
+ }
+ for(int c = 42; auto &&v: arr) body(c,v);
+ for(int cc = 37; auto &&vv: arr) body(cc, vv);
+ }
+}
+
+
+#endif
+// CHECK1-LABEL: define dso_local void @body(
+// CHECK1-SAME: ...) #[[ATTR0:[0-9]+]] {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @foo1(
+// CHECK1-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]]
+// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1: [[COND_TRUE]]:
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: br label %[[COND_END:.*]]
+// CHECK1: [[COND_FALSE]]:
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: br label %[[COND_END]]
+// CHECK1: [[COND_END]]:
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND:.*]]
+// CHECK1: [[FOR_COND]]:
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
+// CHECK1-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1: [[FOR_BODY]]:
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]]
+// CHECK1-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK1: [[IF_THEN]]:
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]]
+// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]]
+// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]]
+// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]]
+// CHECK1-NEXT: store i32 [[ADD20]], ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP35]])
+// CHECK1-NEXT: br label %[[IF_END]]
+// CHECK1: [[IF_END]]:
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]]
+// CHECK1-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]]
+// CHECK1: [[IF_THEN22]]:
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]]
+// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]]
+// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]]
+// CHECK1-NEXT: store i32 [[ADD26]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]])
+// CHECK1-NEXT: br label %[[IF_END27]]
+// CHECK1: [[IF_END27]]:
+// CHECK1-NEXT: br label %[[FOR_INC:.*]]
+// CHECK1: [[FOR_INC]]:
+// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1
+// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK1: [[FOR_END]]:
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @tfoo2(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4)
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_(
+// CHECK1-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
+// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]]
+// CHECK1-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]]
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]]
+// CHECK1-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1
+// CHECK1-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK1-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1
+// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]]
+// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1: [[COND_TRUE]]:
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: br label %[[COND_END:.*]]
+// CHECK1: [[COND_FALSE]]:
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: br label %[[COND_END]]
+// CHECK1: [[COND_END]]:
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]]
+// CHECK1-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]]
+// CHECK1: [[COND_TRUE30]]:
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT: br label %[[COND_END32:.*]]
+// CHECK1: [[COND_FALSE31]]:
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT: br label %[[COND_END32]]
+// CHECK1: [[COND_END32]]:
+// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ]
+// CHECK1-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND:.*]]
+// CHECK1: [[FOR_COND]]:
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1: [[FOR_BODY]]:
+// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]]
+// CHECK1-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK1: [[IF_THEN]]:
+// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]]
+// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]]
+// CHECK1-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]]
+// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]]
+// CHECK1-NEXT: store i32 [[ADD38]], ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]])
+// CHECK1-NEXT: br label %[[IF_END]]
+// CHECK1: [[IF_END]]:
+// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]]
+// CHECK1-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]]
+// CHECK1: [[IF_THEN40]]:
+// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]]
+// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]]
+// CHECK1-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]]
+// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]]
+// CHECK1-NEXT: store i32 [[SUB44]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]])
+// CHECK1-NEXT: br label %[[IF_END45]]
+// CHECK1: [[IF_END45]]:
+// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]]
+// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK1: [[IF_THEN47]]:
+// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
+// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
+// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]]
+// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]]
+// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4
+// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
+// CHECK1-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]]
+// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]]
+// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP70]])
+// CHECK1-NEXT: br label %[[IF_END52]]
+// CHECK1: [[IF_END52]]:
+// CHECK1-NEXT: br label %[[FOR_INC:.*]]
+// CHECK1: [[FOR_INC]]:
+// CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1
+// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK1: [[FOR_END]]:
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define dso_local void @foo3(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__END224:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[J]], align 4
+// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1: [[COND_TRUE]]:
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: br label %[[COND_END:.*]]
+// CHECK1: [[COND_FALSE]]:
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: br label %[[COND_END]]
+// CHECK1: [[COND_END]]:
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8
+// CHECK1-NEXT: store i32 42, ptr [[C]], align 4
+// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK1-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
+// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64
+// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1
+// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1
+// CHECK1-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1
+// CHECK1-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
+// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8
+// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
+// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1
+// CHECK1-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8
+// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4
+// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
+// CHECK1-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256
+// CHECK1-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8
+// CHECK1-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8
+// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8
+// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64
+// CHECK1-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64
+// CHECK1-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]]
+// CHECK1-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8
+// CHECK1-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1
+// CHECK1-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1
+// CHECK1-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1
+// CHECK1-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1
+// CHECK1-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8
+// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
+// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8
+// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
+// CHECK1-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1
+// CHECK1-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
+// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
+// CHECK1-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]]
+// CHECK1-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]]
+// CHECK1: [[COND_TRUE44]]:
+// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
+// CHECK1-NEXT: br label %[[COND_END46:.*]]
+// CHECK1: [[COND_FALSE45]]:
+// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
+// CHECK1-NEXT: br label %[[COND_END46]]
+// CHECK1: [[COND_END46]]:
+// CHECK1-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ]
+// CHECK1-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]]
+// CHECK1-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]]
+// CHECK1: [[COND_TRUE50]]:
+// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT: br label %[[COND_END52:.*]]
+// CHECK1: [[COND_FALSE51]]:
+// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: br label %[[COND_END52]]
+// CHECK1: [[COND_END52]]:
+// CHECK1-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ]
+// CHECK1-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND:.*]]
+// CHECK1: [[FOR_COND]]:
+// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8
+// CHECK1-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]]
+// CHECK1-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1: [[FOR_BODY]]:
+// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
+// CHECK1-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]]
+// CHECK1-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]]
+// CHECK1: [[IF_THEN]]:
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4
+// CHECK1-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4
+// CHECK1-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64
+// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]]
+// CHECK1-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]]
+// CHECK1-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32
+// CHECK1-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4
+// CHECK1-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1
+// CHECK1-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]]
+// CHECK1-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]]
+// CHECK1: [[IF_THEN64]]:
+// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]]
+// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]]
+// CHECK1-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1
+// CHECK1-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]]
+// CHECK1-NEXT: store i32 [[ADD68]], ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP48]])
+// CHECK1-NEXT: br label %[[IF_END]]
+// CHECK1: [[IF_END]]:
+// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]]
+// CHECK1-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]]
+// CHECK1: [[IF_THEN70]]:
+// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]]
+// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]]
+// CHECK1-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2
+// CHECK1-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]]
+// CHECK1-NEXT: store i32 [[ADD74]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP55]])
+// CHECK1-NEXT: br label %[[IF_END75]]
+// CHECK1: [[IF_END75]]:
+// CHECK1-NEXT: br label %[[IF_END76]]
+// CHECK1: [[IF_END76]]:
+// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
+// CHECK1-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]]
+// CHECK1-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]]
+// CHECK1: [[IF_THEN78]]:
+// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8
+// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8
+// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]]
+// CHECK1-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]]
+// CHECK1-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8
+// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
+// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8
+// CHECK1-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1
+// CHECK1-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]]
+// CHECK1-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: store ptr [[TMP63]], ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4
+// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]])
+// CHECK1-NEXT: br label %[[IF_END83]]
+// CHECK1: [[IF_END83]]:
+// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]]
+// CHECK1-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]]
+// CHECK1: [[IF_THEN85]]:
+// CHECK1-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
+// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
+// CHECK1-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]]
+// CHECK1-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]]
+// CHECK1-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8
+// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
+// CHECK1-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1
+// CHECK1-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]]
+// CHECK1-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8
+// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8
+// CHECK1-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8
+// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK1-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]])
+// CHECK1-NEXT: br label %[[IF_END90]]
+// CHECK1: [[IF_END90]]:
+// CHECK1-NEXT: br label %[[FOR_INC:.*]]
+// CHECK1: [[FOR_INC]]:
+// CHECK1-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1
+// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK1: [[FOR_END]]:
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @body(
+// CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @foo1(
+// CHECK2-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]]
+// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2: [[COND_TRUE]]:
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: br label %[[COND_END:.*]]
+// CHECK2: [[COND_FALSE]]:
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: br label %[[COND_END]]
+// CHECK2: [[COND_END]]:
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND:.*]]
+// CHECK2: [[FOR_COND]]:
+// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
+// CHECK2-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2: [[FOR_BODY]]:
+// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]]
+// CHECK2-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK2: [[IF_THEN]]:
+// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]]
+// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]]
+// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]]
+// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]]
+// CHECK2-NEXT: store i32 [[ADD20]], ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP35]])
+// CHECK2-NEXT: br label %[[IF_END]]
+// CHECK2: [[IF_END]]:
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]]
+// CHECK2-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]]
+// CHECK2: [[IF_THEN22]]:
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]]
+// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]]
+// CHECK2-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]]
+// CHECK2-NEXT: store i32 [[ADD26]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]])
+// CHECK2-NEXT: br label %[[IF_END27]]
+// CHECK2: [[IF_END27]]:
+// CHECK2-NEXT: br label %[[FOR_INC:.*]]
+// CHECK2: [[FOR_INC]]:
+// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1
+// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK2: [[FOR_END]]:
+// CHECK2-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @foo3(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__END224:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[J]], align 4
+// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2: [[COND_TRUE]]:
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: br label %[[COND_END:.*]]
+// CHECK2: [[COND_FALSE]]:
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: br label %[[COND_END]]
+// CHECK2: [[COND_END]]:
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8
+// CHECK2-NEXT: store i32 42, ptr [[C]], align 4
+// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8
+// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK2-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
+// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64
+// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1
+// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1
+// CHECK2-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1
+// CHECK2-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8
+// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
+// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8
+// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8
+// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
+// CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1
+// CHECK2-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8
+// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4
+// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8
+// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
+// CHECK2-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256
+// CHECK2-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8
+// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8
+// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8
+// CHECK2-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8
+// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8
+// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64
+// CHECK2-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64
+// CHECK2-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]]
+// CHECK2-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8
+// CHECK2-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1
+// CHECK2-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1
+// CHECK2-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1
+// CHECK2-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1
+// CHECK2-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8
+// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
+// CHECK2-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8
+// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8
+// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
+// CHECK2-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1
+// CHECK2-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
+// CHECK2-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8
+// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
+// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
+// CHECK2-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]]
+// CHECK2-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]]
+// CHECK2: [[COND_TRUE44]]:
+// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
+// CHECK2-NEXT: br label %[[COND_END46:.*]]
+// CHECK2: [[COND_FALSE45]]:
+// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
+// CHECK2-NEXT: br label %[[COND_END46]]
+// CHECK2: [[COND_END46]]:
+// CHECK2-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ]
+// CHECK2-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]]
+// CHECK2-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]]
+// CHECK2: [[COND_TRUE50]]:
+// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT: br label %[[COND_END52:.*]]
+// CHECK2: [[COND_FALSE51]]:
+// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: br label %[[COND_END52]]
+// CHECK2: [[COND_END52]]:
+// CHECK2-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ]
+// CHECK2-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND:.*]]
+// CHECK2: [[FOR_COND]]:
+// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8
+// CHECK2-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]]
+// CHECK2-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2: [[FOR_BODY]]:
+// CHECK2-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
+// CHECK2-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]]
+// CHECK2-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]]
+// CHECK2: [[IF_THEN]]:
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4
+// CHECK2-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4
+// CHECK2-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64
+// CHECK2-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]]
+// CHECK2-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]]
+// CHECK2-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32
+// CHECK2-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4
+// CHECK2-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1
+// CHECK2-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]]
+// CHECK2-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]]
+// CHECK2: [[IF_THEN64]]:
+// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]]
+// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]]
+// CHECK2-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1
+// CHECK2-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]]
+// CHECK2-NEXT: store i32 [[ADD68]], ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP48]])
+// CHECK2-NEXT: br label %[[IF_END]]
+// CHECK2: [[IF_END]]:
+// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]]
+// CHECK2-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]]
+// CHECK2: [[IF_THEN70]]:
+// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]]
+// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]]
+// CHECK2-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2
+// CHECK2-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]]
+// CHECK2-NEXT: store i32 [[ADD74]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP55]])
+// CHECK2-NEXT: br label %[[IF_END75]]
+// CHECK2: [[IF_END75]]:
+// CHECK2-NEXT: br label %[[IF_END76]]
+// CHECK2: [[IF_END76]]:
+// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
+// CHECK2-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]]
+// CHECK2-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]]
+// CHECK2: [[IF_THEN78]]:
+// CHECK2-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8
+// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8
+// CHECK2-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]]
+// CHECK2-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]]
+// CHECK2-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8
+// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
+// CHECK2-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8
+// CHECK2-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1
+// CHECK2-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]]
+// CHECK2-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: store ptr [[TMP63]], ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4
+// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]])
+// CHECK2-NEXT: br label %[[IF_END83]]
+// CHECK2: [[IF_END83]]:
+// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]]
+// CHECK2-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]]
+// CHECK2: [[IF_THEN85]]:
+// CHECK2-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
+// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
+// CHECK2-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]]
+// CHECK2-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]]
+// CHECK2-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8
+// CHECK2-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
+// CHECK2-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1
+// CHECK2-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]]
+// CHECK2-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8
+// CHECK2-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8
+// CHECK2-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8
+// CHECK2-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK2-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK2-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]])
+// CHECK2-NEXT: br label %[[IF_END90]]
+// CHECK2: [[IF_END90]]:
+// CHECK2-NEXT: br label %[[FOR_INC:.*]]
+// CHECK2: [[FOR_INC]]:
+// CHECK2-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1
+// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK2: [[FOR_END]]:
+// CHECK2-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define dso_local void @tfoo2(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4)
+// CHECK2-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_(
+// CHECK2-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]]
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
+// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
+// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
+// CHECK2-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]]
+// CHECK2-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1
+// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]]
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]]
+// CHECK2-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1
+// CHECK2-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK2-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1
+// CHECK2-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]]
+// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2: [[COND_TRUE]]:
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: br label %[[COND_END:.*]]
+// CHECK2: [[COND_FALSE]]:
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: br label %[[COND_END]]
+// CHECK2: [[COND_END]]:
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]]
+// CHECK2-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]]
+// CHECK2: [[COND_TRUE30]]:
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT: br label %[[COND_END32:.*]]
+// CHECK2: [[COND_FALSE31]]:
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT: br label %[[COND_END32]]
+// CHECK2: [[COND_END32]]:
+// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ]
+// CHECK2-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND:.*]]
+// CHECK2: [[FOR_COND]]:
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2: [[FOR_BODY]]:
+// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]]
+// CHECK2-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK2: [[IF_THEN]]:
+// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]]
+// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]]
+// CHECK2-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]]
+// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]]
+// CHECK2-NEXT: store i32 [[ADD38]], ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]])
+// CHECK2-NEXT: br label %[[IF_END]]
+// CHECK2: [[IF_END]]:
+// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]]
+// CHECK2-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]]
+// CHECK2: [[IF_THEN40]]:
+// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]]
+// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]]
+// CHECK2-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]]
+// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]]
+// CHECK2-NEXT: store i32 [[SUB44]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]])
+// CHECK2-NEXT: br label %[[IF_END45]]
+// CHECK2: [[IF_END45]]:
+// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]]
+// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK2: [[IF_THEN47]]:
+// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
+// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
+// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]]
+// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]]
+// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4
+// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
+// CHECK2-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]]
+// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]]
+// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK2-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP70]])
+// CHECK2-NEXT: br label %[[IF_END52]]
+// CHECK2: [[IF_END52]]:
+// CHECK2-NEXT: br label %[[FOR_INC:.*]]
+// CHECK2: [[FOR_INC]]:
+// CHECK2-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1
+// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK2: [[FOR_END]]:
+// CHECK2-NEXT: ret void
+//
+//.
+// CHECK1: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
+// CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"}
+// CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]}
+// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
+//.
+// CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
+// CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"}
+// CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]}
+// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
+//.
diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp
new file mode 100644
index 0000000000000..50dedfd2c0dc6
--- /dev/null
+++ b/clang/test/OpenMP/fuse_messages.cpp
@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -fsyntax-only -Wuninitialized -verify %s
+
+void func() {
+
+ // expected-error at +2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}}
+ #pragma omp fuse
+ ;
+
+ // expected-error at +2 {{statement after '#pragma omp fuse' must be a for loop}}
+ #pragma omp fuse
+ {int bar = 0;}
+
+ // expected-error at +4 {{statement after '#pragma omp fuse' must be a for loop}}
+ #pragma omp fuse
+ {
+ for(int i = 0; i < 10; ++i);
+ int x = 2;
+ }
+
+ // expected-error at +2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}}
+ #pragma omp fuse
+ #pragma omp for
+ for (int i = 0; i < 7; ++i)
+ ;
+
+ {
+ // expected-error at +2 {{expected statement}}
+ #pragma omp fuse
+ }
+
+ // expected-warning at +1 {{extra tokens at the end of '#pragma omp fuse' are ignored}}
+ #pragma omp fuse foo
+ {
+ for (int i = 0; i < 7; ++i)
+ ;
+ }
+
+
+ // expected-error at +1 {{unexpected OpenMP clause 'final' in directive '#pragma omp fuse'}}
+ #pragma omp fuse final(0)
+ {
+ for (int i = 0; i < 7; ++i)
+ ;
+ }
+
+ //expected-error at +4 {{loop after '#pragma omp fuse' is not in canonical form}}
+ //expected-error at +3 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'i'}}
+ #pragma omp fuse
+ {
+ for(int i = 0; i < 10; i*=2) {
+ ;
+ }
+ }
+
+ //expected-error at +2 {{loop sequence after '#pragma omp fuse' must contain at least 1 canonical loop or loop-generating construct}}
+ #pragma omp fuse
+ {}
+
+ //expected-error at +3 {{statement after '#pragma omp fuse' must be a for loop}}
+ #pragma omp fuse
+ {
+ #pragma omp unroll full
+ for(int i = 0; i < 10; ++i);
+
+ for(int j = 0; j < 10; ++j);
+ }
+
+ //expected-warning at +5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'unsigned int'}}
+ //expected-warning at +5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'long long'}}
+ #pragma omp fuse
+ {
+ for(int i = 0; i < 10; ++i);
+ for(unsigned int j = 0; j < 10; ++j);
+ for(long long k = 0; k < 100; ++k);
+ }
+}
\ No newline at end of file
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index fa5df3b5a06e6..80020763961fc 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2206,6 +2206,7 @@ class EnqueueVisitor : public ConstStmtVisitor<EnqueueVisitor, void>,
void VisitOMPUnrollDirective(const OMPUnrollDirective *D);
void VisitOMPReverseDirective(const OMPReverseDirective *D);
void VisitOMPInterchangeDirective(const OMPInterchangeDirective *D);
+ void VisitOMPFuseDirective(const OMPFuseDirective *D);
void VisitOMPForDirective(const OMPForDirective *D);
void VisitOMPForSimdDirective(const OMPForSimdDirective *D);
void VisitOMPSectionsDirective(const OMPSectionsDirective *D);
@@ -3364,6 +3365,10 @@ void EnqueueVisitor::VisitOMPInterchangeDirective(
VisitOMPLoopTransformationDirective(D);
}
+void EnqueueVisitor::VisitOMPFuseDirective(const OMPFuseDirective *D) {
+ VisitOMPLoopTransformationDirective(D);
+}
+
void EnqueueVisitor::VisitOMPForDirective(const OMPForDirective *D) {
VisitOMPLoopDirective(D);
}
@@ -6318,6 +6323,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) {
return cxstring::createRef("OMPReverseDirective");
case CXCursor_OMPInterchangeDirective:
return cxstring::createRef("OMPInterchangeDirective");
+ case CXCursor_OMPFuseDirective:
+ return cxstring::createRef("OMPFuseDirective");
case CXCursor_OMPForDirective:
return cxstring::createRef("OMPForDirective");
case CXCursor_OMPForSimdDirective:
diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index 635d03a88d105..709fa60d28d8d 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -688,6 +688,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
case Stmt::OMPInterchangeDirectiveClass:
K = CXCursor_OMPInterchangeDirective;
break;
+ case Stmt::OMPFuseDirectiveClass:
+ K = CXCursor_OMPFuseDirective;
+ break;
case Stmt::OMPForDirectiveClass:
K = CXCursor_OMPForDirective;
break;
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 194b1e657c493..f33b3b1532d3d 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -842,6 +842,10 @@ def OMP_For : Directive<"for"> {
let association = AS_Loop;
let category = CA_Executable;
}
+def OMP_Fuse : Directive<"fuse"> {
+ let association = AS_Loop;
+ let category = CA_Executable;
+}
def OMP_Interchange : Directive<"interchange"> {
let allowedOnceClauses = [
VersionedClause<OMPC_Permutation>,
diff --git a/openmp/runtime/test/transform/fuse/foreach.cpp b/openmp/runtime/test/transform/fuse/foreach.cpp
new file mode 100644
index 0000000000000..cabf4bf8a511d
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/foreach.cpp
@@ -0,0 +1,192 @@
+// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdarg>
+#include <cstdio>
+#include <vector>
+
+struct Reporter {
+ const char *name;
+
+ Reporter(const char *name) : name(name) { print("ctor"); }
+
+ Reporter() : name("<anon>") { print("ctor"); }
+
+ Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); }
+
+ Reporter(Reporter &&that) : name(that.name) { print("move ctor"); }
+
+ ~Reporter() { print("dtor"); }
+
+ const Reporter &operator=(const Reporter &that) {
+ print("copy assign");
+ this->name = that.name;
+ return *this;
+ }
+
+ const Reporter &operator=(Reporter &&that) {
+ print("move assign");
+ this->name = that.name;
+ return *this;
+ }
+
+ struct Iterator {
+ const Reporter *owner;
+ int pos;
+
+ Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {}
+
+ Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) {
+ owner->print("iterator copy ctor");
+ }
+
+ Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) {
+ owner->print("iterator move ctor");
+ }
+
+ ~Iterator() { owner->print("iterator dtor"); }
+
+ const Iterator &operator=(const Iterator &that) {
+ owner->print("iterator copy assign");
+ this->owner = that.owner;
+ this->pos = that.pos;
+ return *this;
+ }
+
+ const Iterator &operator=(Iterator &&that) {
+ owner->print("iterator move assign");
+ this->owner = that.owner;
+ this->pos = that.pos;
+ return *this;
+ }
+
+ bool operator==(const Iterator &that) const {
+ owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos);
+ return this->pos == that.pos;
+ }
+
+ Iterator &operator++() {
+ owner->print("iterator prefix ++");
+ pos -= 1;
+ return *this;
+ }
+
+ Iterator operator++(int) {
+ owner->print("iterator postfix ++");
+ auto result = *this;
+ pos -= 1;
+ return result;
+ }
+
+ int operator*() const {
+ int result = 2 - pos;
+ owner->print("iterator deref: %i", result);
+ return result;
+ }
+
+ size_t operator-(const Iterator &that) const {
+ int result = (2 - this->pos) - (2 - that.pos);
+ owner->print("iterator distance: %d", result);
+ return result;
+ }
+
+ Iterator operator+(int steps) const {
+ owner->print("iterator advance: %i += %i", 2 - this->pos, steps);
+ return Iterator(owner, pos - steps);
+ }
+
+ void print(const char *msg) const { owner->print(msg); }
+ };
+
+ Iterator begin() const {
+ print("begin()");
+ return Iterator(this, 2);
+ }
+
+ Iterator end() const {
+ print("end()");
+ return Iterator(this, -1);
+ }
+
+ void print(const char *msg, ...) const {
+ va_list args;
+ va_start(args, msg);
+ printf("[%s] ", name);
+ vprintf(msg, args);
+ printf("\n");
+ va_end(args);
+ }
+};
+
+int main() {
+ printf("do\n");
+#pragma omp fuse
+ {
+ for (Reporter a{"C"}; auto &&v : Reporter("A"))
+ printf("v=%d\n", v);
+ for (Reporter aa{"D"}; auto &&vv : Reporter("B"))
+ printf("vv=%d\n", vv);
+ }
+ printf("done\n");
+ return EXIT_SUCCESS;
+}
+
+// CHECK: [C] ctor
+// CHECK-NEXT: [A] ctor
+// CHECK-NEXT: [A] end()
+// CHECK-NEXT: [A] begin()
+// CHECK-NEXT: [A] begin()
+// CHECK-NEXT: [A] iterator distance: 3
+// CHECK-NEXT: [D] ctor
+// CHECK-NEXT: [B] ctor
+// CHECK-NEXT: [B] end()
+// CHECK-NEXT: [B] begin()
+// CHECK-NEXT: [B] begin()
+// CHECK-NEXT: [B] iterator distance: 3
+// CHECK-NEXT: [A] iterator advance: 0 += 0
+// CHECK-NEXT: [A] iterator move assign
+// CHECK-NEXT: [A] iterator deref: 0
+// CHECK-NEXT: v=0
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [B] iterator advance: 0 += 0
+// CHECK-NEXT: [B] iterator move assign
+// CHECK-NEXT: [B] iterator deref: 0
+// CHECK-NEXT: vv=0
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [A] iterator advance: 0 += 1
+// CHECK-NEXT: [A] iterator move assign
+// CHECK-NEXT: [A] iterator deref: 1
+// CHECK-NEXT: v=1
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [B] iterator advance: 0 += 1
+// CHECK-NEXT: [B] iterator move assign
+// CHECK-NEXT: [B] iterator deref: 1
+// CHECK-NEXT: vv=1
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [A] iterator advance: 0 += 2
+// CHECK-NEXT: [A] iterator move assign
+// CHECK-NEXT: [A] iterator deref: 2
+// CHECK-NEXT: v=2
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [B] iterator advance: 0 += 2
+// CHECK-NEXT: [B] iterator move assign
+// CHECK-NEXT: [B] iterator deref: 2
+// CHECK-NEXT: vv=2
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [B] iterator dtor
+// CHECK-NEXT: [B] dtor
+// CHECK-NEXT: [D] dtor
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [A] iterator dtor
+// CHECK-NEXT: [A] dtor
+// CHECK-NEXT: [C] dtor
+// CHECK-NEXT: done
+
+
+#endif
diff --git a/openmp/runtime/test/transform/fuse/intfor.c b/openmp/runtime/test/transform/fuse/intfor.c
new file mode 100644
index 0000000000000..b8171b4df7042
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/intfor.c
@@ -0,0 +1,50 @@
+// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main() {
+ printf("do\n");
+#pragma omp fuse
+ {
+ for (int i = 5; i <= 25; i += 5)
+ printf("i=%d\n", i);
+ for (int j = 10; j < 100; j += 10)
+ printf("j=%d\n", j);
+ for (int k = 10; k > 0; --k)
+ printf("k=%d\n", k);
+ }
+ printf("done\n");
+ return EXIT_SUCCESS;
+}
+#endif /* HEADER */
+
+// CHECK: do
+// CHECK-NEXT: i=5
+// CHECK-NEXT: j=10
+// CHECK-NEXT: k=10
+// CHECK-NEXT: i=10
+// CHECK-NEXT: j=20
+// CHECK-NEXT: k=9
+// CHECK-NEXT: i=15
+// CHECK-NEXT: j=30
+// CHECK-NEXT: k=8
+// CHECK-NEXT: i=20
+// CHECK-NEXT: j=40
+// CHECK-NEXT: k=7
+// CHECK-NEXT: i=25
+// CHECK-NEXT: j=50
+// CHECK-NEXT: k=6
+// CHECK-NEXT: j=60
+// CHECK-NEXT: k=5
+// CHECK-NEXT: j=70
+// CHECK-NEXT: k=4
+// CHECK-NEXT: j=80
+// CHECK-NEXT: k=3
+// CHECK-NEXT: j=90
+// CHECK-NEXT: k=2
+// CHECK-NEXT: k=1
+// CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/fuse/iterfor.cpp b/openmp/runtime/test/transform/fuse/iterfor.cpp
new file mode 100644
index 0000000000000..552484b2981c4
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/iterfor.cpp
@@ -0,0 +1,194 @@
+// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdarg>
+#include <cstdio>
+#include <vector>
+
+struct Reporter {
+ const char *name;
+
+ Reporter(const char *name) : name(name) { print("ctor"); }
+
+ Reporter() : name("<anon>") { print("ctor"); }
+
+ Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); }
+
+ Reporter(Reporter &&that) : name(that.name) { print("move ctor"); }
+
+ ~Reporter() { print("dtor"); }
+
+ const Reporter &operator=(const Reporter &that) {
+ print("copy assign");
+ this->name = that.name;
+ return *this;
+ }
+
+ const Reporter &operator=(Reporter &&that) {
+ print("move assign");
+ this->name = that.name;
+ return *this;
+ }
+
+ struct Iterator {
+ const Reporter *owner;
+ int pos;
+
+ Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {}
+
+ Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) {
+ owner->print("iterator copy ctor");
+ }
+
+ Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) {
+ owner->print("iterator move ctor");
+ }
+
+ ~Iterator() { owner->print("iterator dtor"); }
+
+ const Iterator &operator=(const Iterator &that) {
+ owner->print("iterator copy assign");
+ this->owner = that.owner;
+ this->pos = that.pos;
+ return *this;
+ }
+
+ const Iterator &operator=(Iterator &&that) {
+ owner->print("iterator move assign");
+ this->owner = that.owner;
+ this->pos = that.pos;
+ return *this;
+ }
+
+ bool operator==(const Iterator &that) const {
+ owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos);
+ return this->pos == that.pos;
+ }
+
+ bool operator!=(const Iterator &that) const {
+ owner->print("iterator %d != %d", 2 - this->pos, 2 - that.pos);
+ return this->pos != that.pos;
+ }
+
+ Iterator &operator++() {
+ owner->print("iterator prefix ++");
+ pos -= 1;
+ return *this;
+ }
+
+ Iterator operator++(int) {
+ owner->print("iterator postfix ++");
+ auto result = *this;
+ pos -= 1;
+ return result;
+ }
+
+ int operator*() const {
+ int result = 2 - pos;
+ owner->print("iterator deref: %i", result);
+ return result;
+ }
+
+ size_t operator-(const Iterator &that) const {
+ int result = (2 - this->pos) - (2 - that.pos);
+ owner->print("iterator distance: %d", result);
+ return result;
+ }
+
+ Iterator operator+(int steps) const {
+ owner->print("iterator advance: %i += %i", 2 - this->pos, steps);
+ return Iterator(owner, pos - steps);
+ }
+ };
+
+ Iterator begin() const {
+ print("begin()");
+ return Iterator(this, 2);
+ }
+
+ Iterator end() const {
+ print("end()");
+ return Iterator(this, -1);
+ }
+
+ void print(const char *msg, ...) const {
+ va_list args;
+ va_start(args, msg);
+ printf("[%s] ", name);
+ vprintf(msg, args);
+ printf("\n");
+ va_end(args);
+ }
+};
+
+int main() {
+ printf("do\n");
+ Reporter C("C");
+ Reporter D("D");
+#pragma omp fuse
+ {
+ for (auto it = C.begin(); it != C.end(); ++it)
+ printf("v=%d\n", *it);
+
+ for (auto it = D.begin(); it != D.end(); ++it)
+ printf("vv=%d\n", *it);
+ }
+ printf("done\n");
+ return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK: do
+// CHECK: [C] ctor
+// CHECK-NEXT: [D] ctor
+// CHECK-NEXT: [C] begin()
+// CHECK-NEXT: [C] begin()
+// CHECK-NEXT: [C] end()
+// CHECK-NEXT: [C] iterator distance: 3
+// CHECK-NEXT: [D] begin()
+// CHECK-NEXT: [D] begin()
+// CHECK-NEXT: [D] end()
+// CHECK-NEXT: [D] iterator distance: 3
+// CHECK-NEXT: [C] iterator advance: 0 += 0
+// CHECK-NEXT: [C] iterator move assign
+// CHECK-NEXT: [C] iterator deref: 0
+// CHECK-NEXT: v=0
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [D] iterator advance: 0 += 0
+// CHECK-NEXT: [D] iterator move assign
+// CHECK-NEXT: [D] iterator deref: 0
+// CHECK-NEXT: vv=0
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [C] iterator advance: 0 += 1
+// CHECK-NEXT: [C] iterator move assign
+// CHECK-NEXT: [C] iterator deref: 1
+// CHECK-NEXT: v=1
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [D] iterator advance: 0 += 1
+// CHECK-NEXT: [D] iterator move assign
+// CHECK-NEXT: [D] iterator deref: 1
+// CHECK-NEXT: vv=1
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [C] iterator advance: 0 += 2
+// CHECK-NEXT: [C] iterator move assign
+// CHECK-NEXT: [C] iterator deref: 2
+// CHECK-NEXT: v=2
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [D] iterator advance: 0 += 2
+// CHECK-NEXT: [D] iterator move assign
+// CHECK-NEXT: [D] iterator deref: 2
+// CHECK-NEXT: vv=2
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: done
+// CHECK-NEXT: [D] iterator dtor
+// CHECK-NEXT: [C] iterator dtor
+// CHECK-NEXT: [D] dtor
+// CHECK-NEXT: [C] dtor
diff --git a/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp
new file mode 100644
index 0000000000000..e9f76713fe3e0
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp
@@ -0,0 +1,208 @@
+// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdarg>
+#include <cstdio>
+#include <vector>
+
+struct Reporter {
+ const char *name;
+
+ Reporter(const char *name) : name(name) { print("ctor"); }
+
+ Reporter() : name("<anon>") { print("ctor"); }
+
+ Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); }
+
+ Reporter(Reporter &&that) : name(that.name) { print("move ctor"); }
+
+ ~Reporter() { print("dtor"); }
+
+ const Reporter &operator=(const Reporter &that) {
+ print("copy assign");
+ this->name = that.name;
+ return *this;
+ }
+
+ const Reporter &operator=(Reporter &&that) {
+ print("move assign");
+ this->name = that.name;
+ return *this;
+ }
+
+ struct Iterator {
+ const Reporter *owner;
+ int pos;
+
+ Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {}
+
+ Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) {
+ owner->print("iterator copy ctor");
+ }
+
+ Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) {
+ owner->print("iterator move ctor");
+ }
+
+ ~Iterator() { owner->print("iterator dtor"); }
+
+ const Iterator &operator=(const Iterator &that) {
+ owner->print("iterator copy assign");
+ this->owner = that.owner;
+ this->pos = that.pos;
+ return *this;
+ }
+
+ const Iterator &operator=(Iterator &&that) {
+ owner->print("iterator move assign");
+ this->owner = that.owner;
+ this->pos = that.pos;
+ return *this;
+ }
+
+ bool operator==(const Iterator &that) const {
+ owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos);
+ return this->pos == that.pos;
+ }
+
+ Iterator &operator++() {
+ owner->print("iterator prefix ++");
+ pos -= 1;
+ return *this;
+ }
+
+ Iterator operator++(int) {
+ owner->print("iterator postfix ++");
+ auto result = *this;
+ pos -= 1;
+ return result;
+ }
+
+ int operator*() const {
+ int result = 2 - pos;
+ owner->print("iterator deref: %i", result);
+ return result;
+ }
+
+ size_t operator-(const Iterator &that) const {
+ int result = (2 - this->pos) - (2 - that.pos);
+ owner->print("iterator distance: %d", result);
+ return result;
+ }
+
+ Iterator operator+(int steps) const {
+ owner->print("iterator advance: %i += %i", 2 - this->pos, steps);
+ return Iterator(owner, pos - steps);
+ }
+
+ void print(const char *msg) const { owner->print(msg); }
+ };
+
+ Iterator begin() const {
+ print("begin()");
+ return Iterator(this, 2);
+ }
+
+ Iterator end() const {
+ print("end()");
+ return Iterator(this, -1);
+ }
+
+ void print(const char *msg, ...) const {
+ va_list args;
+ va_start(args, msg);
+ printf("[%s] ", name);
+ vprintf(msg, args);
+ printf("\n");
+ va_end(args);
+ }
+};
+
+int main() {
+ printf("do\n");
+#pragma omp parallel for collapse(2) num_threads(1)
+ for (int i = 0; i < 3; ++i)
+#pragma omp fuse
+ {
+ for (Reporter c{"init-stmt"}; auto &&v : Reporter("range"))
+ printf("i=%d v=%d\n", i, v);
+ for (int vv = 0; vv < 3; ++vv)
+ printf("i=%d vv=%d\n", i, vv);
+ }
+ printf("done\n");
+ return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK: do
+// CHECK-NEXT: [init-stmt] ctor
+// CHECK-NEXT: [range] ctor
+// CHECK-NEXT: [range] end()
+// CHECK-NEXT: [range] begin()
+// CHECK-NEXT: [range] begin()
+// CHECK-NEXT: [range] iterator distance: 3
+// CHECK-NEXT: [range] iterator advance: 0 += 0
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 0
+// CHECK-NEXT: i=0 v=0
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=0 vv=0
+// CHECK-NEXT: [range] iterator advance: 0 += 1
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 1
+// CHECK-NEXT: i=0 v=1
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=0 vv=1
+// CHECK-NEXT: [range] iterator advance: 0 += 2
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 2
+// CHECK-NEXT: i=0 v=2
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=0 vv=2
+// CHECK-NEXT: [range] iterator advance: 0 += 0
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 0
+// CHECK-NEXT: i=1 v=0
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=1 vv=0
+// CHECK-NEXT: [range] iterator advance: 0 += 1
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 1
+// CHECK-NEXT: i=1 v=1
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=1 vv=1
+// CHECK-NEXT: [range] iterator advance: 0 += 2
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 2
+// CHECK-NEXT: i=1 v=2
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=1 vv=2
+// CHECK-NEXT: [range] iterator advance: 0 += 0
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 0
+// CHECK-NEXT: i=2 v=0
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=2 vv=0
+// CHECK-NEXT: [range] iterator advance: 0 += 1
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 1
+// CHECK-NEXT: i=2 v=1
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=2 vv=1
+// CHECK-NEXT: [range] iterator advance: 0 += 2
+// CHECK-NEXT: [range] iterator move assign
+// CHECK-NEXT: [range] iterator deref: 2
+// CHECK-NEXT: i=2 v=2
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: i=2 vv=2
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: [range] iterator dtor
+// CHECK-NEXT: [range] dtor
+// CHECK-NEXT: [init-stmt] dtor
+// CHECK-NEXT: done
+
diff --git a/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c
new file mode 100644
index 0000000000000..272908e72c429
--- /dev/null
+++ b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c
@@ -0,0 +1,45 @@
+// RUN: %libomp-cxx-compile-and-run | FileCheck %s --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+#include <cstdlib>
+#include <cstdio>
+
+int main() {
+ printf("do\n");
+#pragma omp parallel for collapse(2) num_threads(1)
+ for (int i = 0; i < 3; ++i)
+#pragma omp fuse
+ {
+ for (int j = 0; j < 3; ++j)
+ printf("i=%d j=%d\n", i, j);
+ for (int k = 0; k < 3; ++k)
+ printf("i=%d k=%d\n", i, k);
+ }
+ printf("done\n");
+ return EXIT_SUCCESS;
+}
+
+#endif /* HEADER */
+
+// CHECK: do
+// CHECK: i=0 j=0
+// CHECK-NEXT: i=0 k=0
+// CHECK-NEXT: i=0 j=1
+// CHECK-NEXT: i=0 k=1
+// CHECK-NEXT: i=0 j=2
+// CHECK-NEXT: i=0 k=2
+// CHECK-NEXT: i=1 j=0
+// CHECK-NEXT: i=1 k=0
+// CHECK-NEXT: i=1 j=1
+// CHECK-NEXT: i=1 k=1
+// CHECK-NEXT: i=1 j=2
+// CHECK-NEXT: i=1 k=2
+// CHECK-NEXT: i=2 j=0
+// CHECK-NEXT: i=2 k=0
+// CHECK-NEXT: i=2 j=1
+// CHECK-NEXT: i=2 k=1
+// CHECK-NEXT: i=2 j=2
+// CHECK-NEXT: i=2 k=2
+// CHECK-NEXT: done
>From 044ca734221825ed05fbf8372af3cbe264a1cc3c Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:28:04 +0000
Subject: [PATCH 2/7] Add looprange clause
---
clang/include/clang/AST/OpenMPClause.h | 100 ++++++
clang/include/clang/AST/RecursiveASTVisitor.h | 8 +
clang/include/clang/AST/StmtOpenMP.h | 18 +-
.../clang/Basic/DiagnosticSemaKinds.td | 5 +
clang/include/clang/Parse/Parser.h | 3 +
clang/include/clang/Sema/SemaOpenMP.h | 6 +
clang/lib/AST/OpenMPClause.cpp | 35 ++
clang/lib/AST/StmtOpenMP.cpp | 7 +-
clang/lib/AST/StmtProfile.cpp | 7 +
clang/lib/Basic/OpenMPKinds.cpp | 2 +
clang/lib/Parse/ParseOpenMP.cpp | 36 ++
clang/lib/Sema/SemaOpenMP.cpp | 155 +++++++--
clang/lib/Sema/TreeTransform.h | 33 ++
clang/lib/Serialization/ASTReader.cpp | 11 +
clang/lib/Serialization/ASTReaderStmt.cpp | 4 +-
clang/lib/Serialization/ASTWriter.cpp | 8 +
clang/test/OpenMP/fuse_ast_print.cpp | 67 ++++
clang/test/OpenMP/fuse_codegen.cpp | 320 +++++++++++++++++-
clang/test/OpenMP/fuse_messages.cpp | 112 +++++-
clang/tools/libclang/CIndex.cpp | 5 +
llvm/include/llvm/Frontend/OpenMP/ClauseT.h | 16 +-
llvm/include/llvm/Frontend/OpenMP/OMP.td | 6 +
22 files changed, 921 insertions(+), 43 deletions(-)
diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index 572e62249b46f..b9c7b2771c95c 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -1151,6 +1151,106 @@ class OMPFullClause final : public OMPNoChildClause<llvm::omp::OMPC_full> {
static OMPFullClause *CreateEmpty(const ASTContext &C);
};
+/// This class represents the 'looprange' clause in the
+/// '#pragma omp fuse' directive
+///
+/// \code {c}
+/// #pragma omp fuse looprange(1,2)
+/// {
+/// for(int i = 0; i < 64; ++i)
+/// for(int j = 0; j < 256; j+=2)
+/// for(int k = 127; k >= 0; --k)
+/// \endcode
+class OMPLoopRangeClause final : public OMPClause {
+ friend class OMPClauseReader;
+
+ explicit OMPLoopRangeClause()
+ : OMPClause(llvm::omp::OMPC_looprange, {}, {}) {}
+
+ /// Location of '('
+ SourceLocation LParenLoc;
+
+ /// Location of 'first'
+ SourceLocation FirstLoc;
+
+ /// Location of 'count'
+ SourceLocation CountLoc;
+
+ /// Expr associated with 'first' argument
+ Expr *First = nullptr;
+
+ /// Expr associated with 'count' argument
+ Expr *Count = nullptr;
+
+ /// Set 'first'
+ void setFirst(Expr *First) { this->First = First; }
+
+ /// Set 'count'
+ void setCount(Expr *Count) { this->Count = Count; }
+
+ /// Set location of '('.
+ void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
+ /// Set location of 'first' argument
+ void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; }
+
+ /// Set location of 'count' argument
+ void setCountLoc(SourceLocation Loc) { CountLoc = Loc; }
+
+public:
+ /// Build an AST node for a 'looprange' clause
+ ///
+ /// \param StartLoc Starting location of the clause.
+ /// \param LParenLoc Location of '('.
+ /// \param ModifierLoc Modifier location.
+ /// \param
+ static OMPLoopRangeClause *
+ Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
+ SourceLocation FirstLoc, SourceLocation CountLoc,
+ SourceLocation EndLoc, Expr *First, Expr *Count);
+
+ /// Build an empty 'looprange' node for deserialization
+ ///
+ /// \param C Context of the AST.
+ static OMPLoopRangeClause *CreateEmpty(const ASTContext &C);
+
+ /// Returns the location of '('
+ SourceLocation getLParenLoc() const { return LParenLoc; }
+
+ /// Returns the location of 'first'
+ SourceLocation getFirstLoc() const { return FirstLoc; }
+
+ /// Returns the location of 'count'
+ SourceLocation getCountLoc() const { return CountLoc; }
+
+ /// Returns the argument 'first' or nullptr if not set
+ Expr *getFirst() const { return cast_or_null<Expr>(First); }
+
+ /// Returns the argument 'count' or nullptr if not set
+ Expr *getCount() const { return cast_or_null<Expr>(Count); }
+
+ child_range children() {
+ return child_range(reinterpret_cast<Stmt **>(&First),
+ reinterpret_cast<Stmt **>(&Count) + 1);
+ }
+
+ const_child_range children() const {
+ auto Children = const_cast<OMPLoopRangeClause *>(this)->children();
+ return const_child_range(Children.begin(), Children.end());
+ }
+
+ child_range used_children() {
+ return child_range(child_iterator(), child_iterator());
+ }
+ const_child_range used_children() const {
+ return const_child_range(const_child_iterator(), const_child_iterator());
+ }
+
+ static bool classof(const OMPClause *T) {
+ return T->getClauseKind() == llvm::omp::OMPC_looprange;
+ }
+};
+
/// Representation of the 'partial' clause of the '#pragma omp unroll'
/// directive.
///
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index e712a47f1639c..fbc93796ab46a 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -3398,6 +3398,14 @@ bool RecursiveASTVisitor<Derived>::VisitOMPFullClause(OMPFullClause *C) {
return true;
}
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPLoopRangeClause(
+ OMPLoopRangeClause *C) {
+ TRY_TO(TraverseStmt(C->getFirst()));
+ TRY_TO(TraverseStmt(C->getCount()));
+ return true;
+}
+
template <typename Derived>
bool RecursiveASTVisitor<Derived>::VisitOMPPartialClause(OMPPartialClause *C) {
TRY_TO(TraverseStmt(C->getFactor()));
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index dc6f797e24ab8..85bde292ca748 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -5572,7 +5572,9 @@ class OMPTileDirective final : public OMPLoopTransformationDirective {
: OMPLoopTransformationDirective(OMPTileDirectiveClass,
llvm::omp::OMPD_tile, StartLoc, EndLoc,
NumLoops) {
+ // Tiling doubles the original number of loops
setNumGeneratedLoops(2 * NumLoops);
+ // Produces a single top-level canonical loop nest
setNumGeneratedLoopNests(1);
}
@@ -5803,9 +5805,9 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective {
: OMPLoopTransformationDirective(OMPReverseDirectiveClass,
llvm::omp::OMPD_reverse, StartLoc,
EndLoc, 1) {
-
- setNumGeneratedLoopNests(1);
+ // Reverse produces a single top-level canonical loop nest
setNumGeneratedLoops(1);
+ setNumGeneratedLoopNests(1);
}
void setPreInits(Stmt *PreInits) {
@@ -5873,6 +5875,8 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective {
: OMPLoopTransformationDirective(OMPInterchangeDirectiveClass,
llvm::omp::OMPD_interchange, StartLoc,
EndLoc, NumLoops) {
+ // Interchange produces a single top-level canonical loop
+ // nest, with the exact same amount of total loops
setNumGeneratedLoops(NumLoops);
setNumGeneratedLoopNests(1);
}
@@ -5950,11 +5954,7 @@ class OMPFuseDirective final : public OMPLoopTransformationDirective {
unsigned NumLoops)
: OMPLoopTransformationDirective(OMPFuseDirectiveClass,
llvm::omp::OMPD_fuse, StartLoc, EndLoc,
- NumLoops) {
- setNumGeneratedLoops(1);
- // TODO: After implementing the looprange clause, change this logic
- setNumGeneratedLoopNests(1);
- }
+ NumLoops) {}
void setPreInits(Stmt *PreInits) {
Data->getChildren()[PreInitsOffset] = PreInits;
@@ -5990,8 +5990,10 @@ class OMPFuseDirective final : public OMPLoopTransformationDirective {
/// \param C Context of the AST
/// \param NumClauses Number of clauses to allocate
/// \param NumLoops Number of associated loops to allocate
+ /// \param NumLoopNests Number of top level loops to allocate
static OMPFuseDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses,
- unsigned NumLoops);
+ unsigned NumLoops,
+ unsigned NumLoopNests);
/// Gets the associated loops after the transformation. This is the de-sugared
/// replacement or nulltpr in dependent contexts.
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 640db20f82e0b..ecfb0c83a3851 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11524,6 +11524,11 @@ def err_omp_not_a_loop_sequence : Error <
"statement after '#pragma omp %0' must be a loop sequence containing canonical loops or loop-generating constructs">;
def err_omp_empty_loop_sequence : Error <
"loop sequence after '#pragma omp %0' must contain at least 1 canonical loop or loop-generating construct">;
+def err_omp_invalid_looprange : Error <
+ "loop range in '#pragma omp %0' exceeds the number of available loops: "
+ "range end '%1' is greater than the total number of loops '%2'">;
+def warn_omp_redundant_fusion : Warning <
+ "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">;
def err_omp_not_for : Error<
"%select{statement after '#pragma omp %1' must be a for loop|"
"expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">;
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index e0b8850493b49..0c4c4fc4ba417 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -3622,6 +3622,9 @@ class Parser : public CodeCompletionHandler {
OpenMPClauseKind Kind,
bool ParseOnly);
+ /// Parses the 'looprange' clause of a '#pragma omp fuse' directive.
+ OMPClause *ParseOpenMPLoopRangeClause();
+
/// Parses the 'sizes' clause of a '#pragma omp tile' directive.
OMPClause *ParseOpenMPSizesClause();
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index 8d78c2197c89d..f4a075e54cebe 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -921,6 +921,12 @@ class SemaOpenMP : public SemaBase {
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc);
+
+ /// Called on well-form 'looprange' clause after parsing its arguments.
+ OMPClause *
+ ActOnOpenMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc,
+ SourceLocation LParenLoc, SourceLocation FirstLoc,
+ SourceLocation CountLoc, SourceLocation EndLoc);
/// Called on well-formed 'ordered' clause.
OMPClause *
ActOnOpenMPOrderedClause(SourceLocation StartLoc, SourceLocation EndLoc,
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 2226791a70b6e..e3dbc00ecf9e5 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -1024,6 +1024,26 @@ OMPPartialClause *OMPPartialClause::CreateEmpty(const ASTContext &C) {
return new (C) OMPPartialClause();
}
+OMPLoopRangeClause *
+OMPLoopRangeClause::Create(const ASTContext &C, SourceLocation StartLoc,
+ SourceLocation LParenLoc, SourceLocation EndLoc,
+ SourceLocation FirstLoc, SourceLocation CountLoc,
+ Expr *First, Expr *Count) {
+ OMPLoopRangeClause *Clause = CreateEmpty(C);
+ Clause->setLocStart(StartLoc);
+ Clause->setLParenLoc(LParenLoc);
+ Clause->setLocEnd(EndLoc);
+ Clause->setFirstLoc(FirstLoc);
+ Clause->setCountLoc(CountLoc);
+ Clause->setFirst(First);
+ Clause->setCount(Count);
+ return Clause;
+}
+
+OMPLoopRangeClause *OMPLoopRangeClause::CreateEmpty(const ASTContext &C) {
+ return new (C) OMPLoopRangeClause();
+}
+
OMPAllocateClause *OMPAllocateClause::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
Expr *Allocator, Expr *Alignment, SourceLocation ColonLoc,
@@ -1888,6 +1908,21 @@ void OMPClausePrinter::VisitOMPPartialClause(OMPPartialClause *Node) {
}
}
+void OMPClausePrinter::VisitOMPLoopRangeClause(OMPLoopRangeClause *Node) {
+ OS << "looprange";
+
+ Expr *First = Node->getFirst();
+ Expr *Count = Node->getCount();
+
+ if (First && Count) {
+ OS << "(";
+ First->printPretty(OS, nullptr, Policy, 0);
+ OS << ",";
+ Count->printPretty(OS, nullptr, Policy, 0);
+ OS << ")";
+ }
+}
+
void OMPClausePrinter::VisitOMPAllocatorClause(OMPAllocatorClause *Node) {
OS << "allocator(";
Node->getAllocator()->printPretty(OS, nullptr, Policy, 0);
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index f050e9063f1fc..6a2ac64f4e40b 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -524,10 +524,13 @@ OMPFuseDirective *OMPFuseDirective::Create(
OMPFuseDirective *OMPFuseDirective::CreateEmpty(const ASTContext &C,
unsigned NumClauses,
- unsigned NumLoops) {
- return createEmptyDirective<OMPFuseDirective>(
+ unsigned NumLoops,
+ unsigned NumLoopNests) {
+ OMPFuseDirective *Dir = createEmptyDirective<OMPFuseDirective>(
C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1,
SourceLocation(), SourceLocation(), NumLoops);
+ Dir->setNumGeneratedLoopNests(NumLoopNests);
+ return Dir;
}
OMPForSimdDirective *
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 933ad19b7a8ef..34f479b4b0b8a 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -511,6 +511,13 @@ void OMPClauseProfiler::VisitOMPPartialClause(const OMPPartialClause *C) {
Profiler->VisitExpr(Factor);
}
+void OMPClauseProfiler::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) {
+ if (const Expr *First = C->getFirst())
+ Profiler->VisitExpr(First);
+ if (const Expr *Count = C->getCount())
+ Profiler->VisitExpr(Count);
+}
+
void OMPClauseProfiler::VisitOMPAllocatorClause(const OMPAllocatorClause *C) {
if (C->getAllocator())
Profiler->VisitStmt(C->getAllocator());
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index e18867e3c0281..3c62b61f3a438 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -248,6 +248,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str,
case OMPC_affinity:
case OMPC_when:
case OMPC_append_args:
+ case OMPC_looprange:
break;
default:
break;
@@ -583,6 +584,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind,
case OMPC_affinity:
case OMPC_when:
case OMPC_append_args:
+ case OMPC_looprange:
break;
default:
break;
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index 8d8698e61216f..6643572b878f2 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -3116,6 +3116,39 @@ OMPClause *Parser::ParseOpenMPSizesClause() {
OpenLoc, CloseLoc);
}
+OMPClause *Parser::ParseOpenMPLoopRangeClause() {
+ SourceLocation ClauseNameLoc = ConsumeToken();
+ SourceLocation FirstLoc, CountLoc;
+
+ BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end);
+ if (T.consumeOpen()) {
+ Diag(Tok, diag::err_expected) << tok::l_paren;
+ return nullptr;
+ }
+
+ FirstLoc = Tok.getLocation();
+ ExprResult FirstVal = ParseConstantExpression();
+ if (!FirstVal.isUsable()) {
+ T.skipToEnd();
+ return nullptr;
+ }
+
+ ExpectAndConsume(tok::comma);
+
+ CountLoc = Tok.getLocation();
+ ExprResult CountVal = ParseConstantExpression();
+ if (!CountVal.isUsable()) {
+ T.skipToEnd();
+ return nullptr;
+ }
+
+ T.consumeClose();
+
+ return Actions.OpenMP().ActOnOpenMPLoopRangeClause(
+ FirstVal.get(), CountVal.get(), ClauseNameLoc, T.getOpenLocation(),
+ FirstLoc, CountLoc, T.getCloseLocation());
+}
+
OMPClause *Parser::ParseOpenMPPermutationClause() {
SourceLocation ClauseNameLoc, OpenLoc, CloseLoc;
SmallVector<Expr *> ArgExprs;
@@ -3545,6 +3578,9 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
}
Clause = ParseOpenMPClause(CKind, WrongDirective);
break;
+ case OMPC_looprange:
+ Clause = ParseOpenMPLoopRangeClause();
+ break;
default:
break;
}
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index c9885518217f3..8cd56d1af6ac8 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -14257,7 +14257,6 @@ bool SemaOpenMP::checkTransformableLoopSequence(
// and tries to match the input AST to the canonical loop sequence grammar
// structure
- auto NLCV = NestedLoopCounterVisitor();
// Helper functions to validate canonical loop sequence grammar is valid
auto isLoopSequenceDerivation = [](auto *Child) {
return isa<ForStmt>(Child) || isa<CXXForRangeStmt>(Child) ||
@@ -14360,7 +14359,7 @@ bool SemaOpenMP::checkTransformableLoopSequence(
// Modularized code for handling regular canonical loops
auto handleRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits,
- &LoopSeqSize, &NumLoops, Kind, &TmpDSA, &NLCV,
+ &LoopSeqSize, &NumLoops, Kind, &TmpDSA,
this](Stmt *Child) {
OriginalInits.emplace_back();
LoopHelpers.emplace_back();
@@ -14373,8 +14372,11 @@ bool SemaOpenMP::checkTransformableLoopSequence(
<< getOpenMPDirectiveName(Kind);
return false;
}
+
storeLoopStatements(Child);
- NumLoops += NLCV.TraverseStmt(Child);
+ auto NLCV = NestedLoopCounterVisitor();
+ NLCV.TraverseStmt(Child);
+ NumLoops += NLCV.getNestedLoopCount();
return true;
};
@@ -15686,6 +15688,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
Stmt *AStmt,
SourceLocation StartLoc,
SourceLocation EndLoc) {
+
ASTContext &Context = getASTContext();
DeclContext *CurrContext = SemaRef.CurContext;
Scope *CurScope = SemaRef.getCurScope();
@@ -15702,7 +15705,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
SmallVector<SmallVector<Stmt *, 0>> OriginalInits;
unsigned NumLoops;
- // TODO: Support looprange clause using LoopSeqSize
unsigned LoopSeqSize;
if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops,
LoopHelpers, LoopStmts, OriginalInits,
@@ -15711,10 +15713,67 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
}
// Defer transformation in dependent contexts
+ // The NumLoopNests argument is set to a placeholder (0)
+ // because a dependent context could prevent determining its true value
if (CurrContext->isDependentContext()) {
return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses,
- NumLoops, 1, AStmt, nullptr, nullptr);
+ NumLoops, 0, AStmt, nullptr, nullptr);
}
+
+ // Handle clauses, which can be any of the following: [looprange, apply]
+ const OMPLoopRangeClause *LRC =
+ OMPExecutableDirective::getSingleClause<OMPLoopRangeClause>(Clauses);
+
+ // The clause arguments are invalidated if any error arises
+ // such as non-constant or non-positive arguments
+ if (LRC && (!LRC->getFirst() || !LRC->getCount()))
+ return StmtError();
+
+ // Delayed semantic check of LoopRange constraint
+ // Evaluates the loop range arguments and returns the first and count values
+ auto EvaluateLoopRangeArguments = [&Context](Expr *First, Expr *Count,
+ uint64_t &FirstVal,
+ uint64_t &CountVal) {
+ llvm::APSInt FirstInt = First->EvaluateKnownConstInt(Context);
+ llvm::APSInt CountInt = Count->EvaluateKnownConstInt(Context);
+ FirstVal = FirstInt.getZExtValue();
+ CountVal = CountInt.getZExtValue();
+ };
+
+ // Checks if the loop range is valid
+ auto ValidLoopRange = [](uint64_t FirstVal, uint64_t CountVal,
+ unsigned NumLoops) -> bool {
+ return FirstVal + CountVal - 1 <= NumLoops;
+ };
+ uint64_t FirstVal = 1, CountVal = 0, LastVal = LoopSeqSize;
+
+ if (LRC) {
+ EvaluateLoopRangeArguments(LRC->getFirst(), LRC->getCount(), FirstVal,
+ CountVal);
+ if (CountVal == 1)
+ SemaRef.Diag(LRC->getCountLoc(), diag::warn_omp_redundant_fusion)
+ << getOpenMPDirectiveName(OMPD_fuse);
+
+ if (!ValidLoopRange(FirstVal, CountVal, LoopSeqSize)) {
+ SemaRef.Diag(LRC->getFirstLoc(), diag::err_omp_invalid_looprange)
+ << getOpenMPDirectiveName(OMPD_fuse) << (FirstVal + CountVal - 1)
+ << LoopSeqSize;
+ return StmtError();
+ }
+
+ LastVal = FirstVal + CountVal - 1;
+ }
+
+ // Complete fusion generates a single canonical loop nest
+ // However looprange clause generates several loop nests
+ unsigned NumLoopNests = LRC ? LoopSeqSize - CountVal + 1 : 1;
+
+ // Emit a warning for redundant loop fusion when the sequence contains only
+ // one loop.
+ if (LoopSeqSize == 1)
+ SemaRef.Diag(AStmt->getBeginLoc(), diag::warn_omp_redundant_fusion)
+ << getOpenMPDirectiveName(OMPD_fuse);
+
assert(LoopHelpers.size() == LoopSeqSize &&
"Expecting loop iteration space dimensionality to match number of "
"affected loops");
@@ -15728,8 +15787,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
SmallVector<Stmt *> PreInits;
// Select the type with the largest bit width among all induction variables
- QualType IVType = LoopHelpers[0].IterationVarRef->getType();
- for (unsigned int I = 1; I < LoopSeqSize; ++I) {
+ QualType IVType = LoopHelpers[FirstVal - 1].IterationVarRef->getType();
+ for (unsigned int I = FirstVal; I < LastVal; ++I) {
QualType CurrentIVType = LoopHelpers[I].IterationVarRef->getType();
if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) {
IVType = CurrentIVType;
@@ -15778,20 +15837,21 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
// Process each single loop to generate and collect declarations
// and statements for all helper expressions
- for (unsigned int I = 0; I < LoopSeqSize; ++I) {
+ for (unsigned int I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) {
addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I],
PreInits);
- auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", I);
- auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", I);
- auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", I);
+ auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", J);
+ auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", J);
+ auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", J);
auto [NIVD, NIDStmt] =
- CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", I, true);
+ CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", J, true);
auto [IVVD, IVDStmt] =
- CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", I);
+ CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", J);
if (!LBVD || !STVD || !NIVD || !IVVD)
- return StmtError();
+ assert(LBVD && STVD && NIVD && IVVD &&
+ "OpenMP Fuse Helper variables creation failed");
UBVarDecls.push_back(UBVD);
LBVarDecls.push_back(LBVD);
@@ -15866,8 +15926,9 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
// omp.fuse.max = max(omp.temp1, omp.temp0)
ExprResult MaxExpr;
- for (unsigned I = 0; I < LoopSeqSize; ++I) {
- DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[I]);
+ // I is the true
+ for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) {
+ DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[J]);
QualType NITy = NIRef->getType();
if (MaxExpr.isUnset()) {
@@ -15875,7 +15936,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
MaxExpr = NIRef;
} else {
// Create a new acummulator variable t_i = MaxExpr
- std::string TempName = (Twine(".omp.temp.") + Twine(I)).str();
+ std::string TempName = (Twine(".omp.temp.") + Twine(J)).str();
VarDecl *TempDecl =
buildVarDecl(SemaRef, {}, NITy, TempName, nullptr, nullptr);
TempDecl->setInit(MaxExpr.get());
@@ -15898,7 +15959,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
if (!Comparison.isUsable())
return StmtError();
- DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[I]);
+ DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[J]);
// Update MaxExpr using a conditional expression to hold the max value
MaxExpr = new (Context) ConditionalOperator(
Comparison.get(), SourceLocation(), TempRef2, SourceLocation(),
@@ -15951,23 +16012,21 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
CompoundStmt *FusedBody = nullptr;
SmallVector<Stmt *, 4> FusedBodyStmts;
- for (unsigned I = 0; I < LoopSeqSize; ++I) {
-
+ for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) {
// Assingment of the original sub-loop index to compute the logical index
// IV_k = LB_k + omp.fuse.index * ST_k
-
ExprResult IdxExpr =
SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul,
- MakeVarDeclRef(STVarDecls[I]), MakeIVRef());
+ MakeVarDeclRef(STVarDecls[J]), MakeIVRef());
if (!IdxExpr.isUsable())
return StmtError();
IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Add,
- MakeVarDeclRef(LBVarDecls[I]), IdxExpr.get());
+ MakeVarDeclRef(LBVarDecls[J]), IdxExpr.get());
if (!IdxExpr.isUsable())
return StmtError();
IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Assign,
- MakeVarDeclRef(IVVarDecls[I]), IdxExpr.get());
+ MakeVarDeclRef(IVVarDecls[J]), IdxExpr.get());
if (!IdxExpr.isUsable())
return StmtError();
@@ -15982,7 +16041,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
Stmt *Body = (isa<ForStmt>(LoopStmts[I]))
? cast<ForStmt>(LoopStmts[I])->getBody()
: cast<CXXForRangeStmt>(LoopStmts[I])->getBody();
-
BodyStmts.push_back(Body);
CompoundStmt *CombinedBody =
@@ -15990,7 +16048,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
SourceLocation(), SourceLocation());
ExprResult Condition =
SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, MakeIVRef(),
- MakeVarDeclRef(NIVarDecls[I]));
+ MakeVarDeclRef(NIVarDecls[J]));
if (!Condition.isUsable())
return StmtError();
@@ -16011,8 +16069,26 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(),
IncrExpr.get()->getEndLoc());
+ // In the case of looprange, the result of fuse won't simply
+ // be a single loop (ForStmt), but rather a loop sequence
+ // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop
+ // and the post-fusion loops, preserving its original order.
+ Stmt *FusionStmt = FusedForStmt;
+ if (LRC) {
+ SmallVector<Stmt *, 4> FinalLoops;
+ // Gather all the pre-fusion loops
+ for (unsigned I = 0; I < FirstVal - 1; ++I)
+ FinalLoops.push_back(LoopStmts[I]);
+ // Gather the fused loop
+ FinalLoops.push_back(FusedForStmt);
+ // Gather all the post-fusion loops
+ for (unsigned I = FirstVal + CountVal - 1; I < LoopSeqSize; ++I)
+ FinalLoops.push_back(LoopStmts[I]);
+ FusionStmt = CompoundStmt::Create(Context, FinalLoops, FPOptionsOverride(),
+ SourceLocation(), SourceLocation());
+ }
return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, NumLoops,
- 1, AStmt, FusedForStmt,
+ NumLoopNests, AStmt, FusionStmt,
buildPreInits(Context, PreInits));
}
@@ -17128,6 +17204,31 @@ OMPClause *SemaOpenMP::ActOnOpenMPPartialClause(Expr *FactorExpr,
FactorExpr);
}
+OMPClause *SemaOpenMP::ActOnOpenMPLoopRangeClause(
+ Expr *First, Expr *Count, SourceLocation StartLoc, SourceLocation LParenLoc,
+ SourceLocation FirstLoc, SourceLocation CountLoc, SourceLocation EndLoc) {
+
+ // OpenMP [6.0, Restrictions]
+ // First and Count must be integer expressions with positive value
+ ExprResult FirstVal =
+ VerifyPositiveIntegerConstantInClause(First, OMPC_looprange);
+ if (FirstVal.isInvalid())
+ First = nullptr;
+
+ ExprResult CountVal =
+ VerifyPositiveIntegerConstantInClause(Count, OMPC_looprange);
+ if (CountVal.isInvalid())
+ Count = nullptr;
+
+ // OpenMP [6.0, Restrictions]
+ // first + count - 1 must not evaluate to a value greater than the
+ // loop sequence length of the associated canonical loop sequence.
+ // This check must be performed afterwards due to the delayed
+ // parsing and computation of the associated loop sequence
+ return OMPLoopRangeClause::Create(getASTContext(), StartLoc, LParenLoc,
+ FirstLoc, CountLoc, EndLoc, First, Count);
+}
+
OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 39082e06a5a0b..68b51bb3c19c5 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1775,6 +1775,14 @@ class TreeTransform {
LParenLoc, EndLoc);
}
+ OMPClause *
+ RebuildOMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc,
+ SourceLocation LParenLoc, SourceLocation FirstLoc,
+ SourceLocation CountLoc, SourceLocation EndLoc) {
+ return getSema().OpenMP().ActOnOpenMPLoopRangeClause(
+ First, Count, StartLoc, LParenLoc, FirstLoc, CountLoc, EndLoc);
+ }
+
/// Build a new OpenMP 'allocator' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
@@ -10566,6 +10574,31 @@ TreeTransform<Derived>::TransformOMPPartialClause(OMPPartialClause *C) {
C->getEndLoc());
}
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPLoopRangeClause(OMPLoopRangeClause *C) {
+ ExprResult F = getDerived().TransformExpr(C->getFirst());
+ if (F.isInvalid())
+ return nullptr;
+
+ ExprResult Cn = getDerived().TransformExpr(C->getCount());
+ if (Cn.isInvalid())
+ return nullptr;
+
+ Expr *First = F.get();
+ Expr *Count = Cn.get();
+
+ bool Changed = (First != C->getFirst()) || (Count != C->getCount());
+
+ // If no changes and AlwaysRebuild() is false, return the original clause
+ if (!Changed && !getDerived().AlwaysRebuild())
+ return C;
+
+ return RebuildOMPLoopRangeClause(First, Count, C->getBeginLoc(),
+ C->getLParenLoc(), C->getFirstLoc(),
+ C->getCountLoc(), C->getEndLoc());
+}
+
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPCollapseClause(OMPCollapseClause *C) {
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index a17d6229ee3a1..b5aa729bf717b 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -11086,6 +11086,9 @@ OMPClause *OMPClauseReader::readClause() {
case llvm::omp::OMPC_partial:
C = OMPPartialClause::CreateEmpty(Context);
break;
+ case llvm::omp::OMPC_looprange:
+ C = OMPLoopRangeClause::CreateEmpty(Context);
+ break;
case llvm::omp::OMPC_allocator:
C = new (Context) OMPAllocatorClause();
break;
@@ -11487,6 +11490,14 @@ void OMPClauseReader::VisitOMPPartialClause(OMPPartialClause *C) {
C->setLParenLoc(Record.readSourceLocation());
}
+void OMPClauseReader::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) {
+ C->setFirst(Record.readSubExpr());
+ C->setCount(Record.readSubExpr());
+ C->setLParenLoc(Record.readSourceLocation());
+ C->setFirstLoc(Record.readSourceLocation());
+ C->setCountLoc(Record.readSourceLocation());
+}
+
void OMPClauseReader::VisitOMPAllocatorClause(OMPAllocatorClause *C) {
C->setAllocator(Record.readExpr());
C->setLParenLoc(Record.readSourceLocation());
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index aee052404874c..90a058629f19d 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -3621,7 +3621,9 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
case STMT_OMP_FUSE_DIRECTIVE: {
unsigned NumLoops = Record[ASTStmtReader::NumStmtFields];
unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1];
- S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops);
+ unsigned NumLoopNests = Record[ASTStmtReader::NumStmtFields + 2];
+ S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops,
+ NumLoopNests);
break;
}
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index cccf53de25882..33e1918f8fd91 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -7785,6 +7785,14 @@ void OMPClauseWriter::VisitOMPPartialClause(OMPPartialClause *C) {
Record.AddSourceLocation(C->getLParenLoc());
}
+void OMPClauseWriter::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) {
+ Record.AddStmt(C->getFirst());
+ Record.AddStmt(C->getCount());
+ Record.AddSourceLocation(C->getLParenLoc());
+ Record.AddSourceLocation(C->getFirstLoc());
+ Record.AddSourceLocation(C->getCountLoc());
+}
+
void OMPClauseWriter::VisitOMPAllocatorClause(OMPAllocatorClause *C) {
Record.AddStmt(C->getAllocator());
Record.AddSourceLocation(C->getLParenLoc());
diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp
index 43ce815dab024..ac4f0d38a9c68 100644
--- a/clang/test/OpenMP/fuse_ast_print.cpp
+++ b/clang/test/OpenMP/fuse_ast_print.cpp
@@ -271,6 +271,73 @@ void foo7() {
}
+// PRINT-LABEL: void foo8(
+// DUMP-LABEL: FunctionDecl {{.*}} foo8
+void foo8() {
+ // PRINT: #pragma omp fuse looprange(2,2)
+ // DUMP: OMPFuseDirective
+ // DUMP: OMPLooprangeClause
+ #pragma omp fuse looprange(2,2)
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i < 10; i += 2)
+ // DUMP: ForStmt
+ for (int i = 0; i < 10; i += 2)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: for (int j = 10; j > 0; --j)
+ // DUMP: ForStmt
+ for (int j = 10; j > 0; --j)
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+ // PRINT: for (int k = 0; k <= 10; ++k)
+ // DUMP: ForStmt
+ for (int k = 0; k <= 10; ++k)
+ // PRINT: body(k)
+ // DUMP: CallExpr
+ body(k);
+
+ }
+
+}
+
+//PRINT-LABEL: void foo9(
+//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo9
+//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} F
+//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} C
+template<int F, int C>
+void foo9() {
+ // PRINT: #pragma omp fuse looprange(F,C)
+ // DUMP: OMPFuseDirective
+ // DUMP: OMPLooprangeClause
+ #pragma omp fuse looprange(F,C)
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i < 10; i += 2)
+ // DUMP: ForStmt
+ for (int i = 0; i < 10; i += 2)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: for (int j = 10; j > 0; --j)
+ // DUMP: ForStmt
+ for (int j = 10; j > 0; --j)
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+
+ }
+}
+
+// Also test instantiating the template.
+void tfoo9() {
+ foo9<1, 2>();
+}
+
diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp
index 6c1e21092da43..d9500bed3ce31 100644
--- a/clang/test/OpenMP/fuse_codegen.cpp
+++ b/clang/test/OpenMP/fuse_codegen.cpp
@@ -53,6 +53,18 @@ extern "C" void foo3() {
}
}
+extern "C" void foo4() {
+ double arr[256];
+
+ #pragma omp fuse looprange(2,2)
+ {
+ for(int i = 0; i < 128; ++i) body(i);
+ for(int j = 0; j < 256; j+=2) body(j);
+ for(int k = 0; k < 64; ++k) body(k);
+ for(int c = 42; auto &&v: arr) body(c,v);
+ }
+}
+
#endif
// CHECK1-LABEL: define dso_local void @body(
@@ -777,6 +789,157 @@ extern "C" void foo3() {
// CHECK1-NEXT: ret void
//
//
+// CHECK1-LABEL: define dso_local void @foo4(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: store i32 0, ptr [[J]], align 4
+// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[K]], align 4
+// CHECK1-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1: [[COND_TRUE]]:
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: br label %[[COND_END:.*]]
+// CHECK1: [[COND_FALSE]]:
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: br label %[[COND_END]]
+// CHECK1: [[COND_END]]:
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND:.*]]
+// CHECK1: [[FOR_COND]]:
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128
+// CHECK1-NEXT: br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1: [[FOR_BODY]]:
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP6]])
+// CHECK1-NEXT: br label %[[FOR_INC:.*]]
+// CHECK1: [[FOR_INC]]:
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK1: [[FOR_END]]:
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND2:.*]]
+// CHECK1: [[FOR_COND2]]:
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]]
+// CHECK1: [[FOR_BODY4]]:
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]]
+// CHECK1-NEXT: br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK1: [[IF_THEN]]:
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]]
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2
+// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
+// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP16]])
+// CHECK1-NEXT: br label %[[IF_END]]
+// CHECK1: [[IF_END]]:
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]]
+// CHECK1-NEXT: br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]]
+// CHECK1: [[IF_THEN9]]:
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]]
+// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]]
+// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1
+// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
+// CHECK1-NEXT: store i32 [[ADD13]], ptr [[K]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP23]])
+// CHECK1-NEXT: br label %[[IF_END14]]
+// CHECK1: [[IF_END14]]:
+// CHECK1-NEXT: br label %[[FOR_INC15:.*]]
+// CHECK1: [[FOR_INC15]]:
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP24]], 1
+// CHECK1-NEXT: store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK1: [[FOR_END17]]:
+// CHECK1-NEXT: store i32 42, ptr [[C]], align 4
+// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0
+// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256
+// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND19:.*]]
+// CHECK1: [[FOR_COND19]]:
+// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK1-NEXT: [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]]
+// CHECK1-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]]
+// CHECK1: [[FOR_BODY21]]:
+// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: store ptr [[TMP29]], ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[C]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]])
+// CHECK1-NEXT: br label %[[FOR_INC22:.*]]
+// CHECK1: [[FOR_INC22]]:
+// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1
+// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND19]]
+// CHECK1: [[FOR_END23]]:
+// CHECK1-NEXT: ret void
+//
+//
// CHECK2-LABEL: define dso_local void @body(
// CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: [[ENTRY:.*:]]
@@ -1259,6 +1422,157 @@ extern "C" void foo3() {
// CHECK2-NEXT: ret void
//
//
+// CHECK2-LABEL: define dso_local void @foo4(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: store i32 0, ptr [[J]], align 4
+// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[K]], align 4
+// CHECK2-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2: [[COND_TRUE]]:
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: br label %[[COND_END:.*]]
+// CHECK2: [[COND_FALSE]]:
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: br label %[[COND_END]]
+// CHECK2: [[COND_END]]:
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND:.*]]
+// CHECK2: [[FOR_COND]]:
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128
+// CHECK2-NEXT: br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2: [[FOR_BODY]]:
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP6]])
+// CHECK2-NEXT: br label %[[FOR_INC:.*]]
+// CHECK2: [[FOR_INC]]:
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK2: [[FOR_END]]:
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND2:.*]]
+// CHECK2: [[FOR_COND2]]:
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]]
+// CHECK2-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]]
+// CHECK2: [[FOR_BODY4]]:
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]]
+// CHECK2-NEXT: br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK2: [[IF_THEN]]:
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]]
+// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]]
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2
+// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
+// CHECK2-NEXT: store i32 [[ADD7]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP16]])
+// CHECK2-NEXT: br label %[[IF_END]]
+// CHECK2: [[IF_END]]:
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]]
+// CHECK2-NEXT: br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]]
+// CHECK2: [[IF_THEN9]]:
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]]
+// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]]
+// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1
+// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
+// CHECK2-NEXT: store i32 [[ADD13]], ptr [[K]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP23]])
+// CHECK2-NEXT: br label %[[IF_END14]]
+// CHECK2: [[IF_END14]]:
+// CHECK2-NEXT: br label %[[FOR_INC15:.*]]
+// CHECK2: [[FOR_INC15]]:
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP24]], 1
+// CHECK2-NEXT: store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK2: [[FOR_END17]]:
+// CHECK2-NEXT: store i32 42, ptr [[C]], align 4
+// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0
+// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256
+// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND19:.*]]
+// CHECK2: [[FOR_COND19]]:
+// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK2-NEXT: [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]]
+// CHECK2-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]]
+// CHECK2: [[FOR_BODY21]]:
+// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: store ptr [[TMP29]], ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[C]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]])
+// CHECK2-NEXT: br label %[[FOR_INC22:.*]]
+// CHECK2: [[FOR_INC22]]:
+// CHECK2-NEXT: [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1
+// CHECK2-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND19]]
+// CHECK2: [[FOR_END23]]:
+// CHECK2-NEXT: ret void
+//
+//
// CHECK2-LABEL: define dso_local void @tfoo2(
// CHECK2-SAME: ) #[[ATTR0]] {
// CHECK2-NEXT: [[ENTRY:.*:]]
@@ -1494,7 +1808,7 @@ extern "C" void foo3() {
// CHECK2-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1
// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK2: [[FOR_END]]:
// CHECK2-NEXT: ret void
//
@@ -1503,9 +1817,13 @@ extern "C" void foo3() {
// CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"}
// CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]}
// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
+// CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]}
+// CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
//.
// CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
// CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"}
// CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]}
// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
+// CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]}
+// CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
//.
diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp
index 50dedfd2c0dc6..2a2491d008a0b 100644
--- a/clang/test/OpenMP/fuse_messages.cpp
+++ b/clang/test/OpenMP/fuse_messages.cpp
@@ -33,6 +33,8 @@ void func() {
{
for (int i = 0; i < 7; ++i)
;
+ for(int j = 0; j < 100; ++j);
+
}
@@ -41,6 +43,8 @@ void func() {
{
for (int i = 0; i < 7; ++i)
;
+ for(int j = 0; j < 100; ++j);
+
}
//expected-error at +4 {{loop after '#pragma omp fuse' is not in canonical form}}
@@ -50,6 +54,7 @@ void func() {
for(int i = 0; i < 10; i*=2) {
;
}
+ for(int j = 0; j < 100; ++j);
}
//expected-error at +2 {{loop sequence after '#pragma omp fuse' must contain at least 1 canonical loop or loop-generating construct}}
@@ -73,4 +78,109 @@ void func() {
for(unsigned int j = 0; j < 10; ++j);
for(long long k = 0; k < 100; ++k);
}
-}
\ No newline at end of file
+
+ //expected-warning at +2 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}}
+ #pragma omp fuse
+ {
+ for(int i = 0; i < 10; ++i);
+ }
+
+ //expected-warning at +1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}}
+ #pragma omp fuse looprange(1, 1)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ }
+
+ //expected-error at +1 {{argument to 'looprange' clause must be a strictly positive integer value}}
+ #pragma omp fuse looprange(1, -1)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ }
+
+ //expected-error at +1 {{argument to 'looprange' clause must be a strictly positive integer value}}
+ #pragma omp fuse looprange(1, 0)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ }
+
+ const int x = 1;
+ constexpr int y = 4;
+ //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '4' is greater than the total number of loops '3'}}
+ #pragma omp fuse looprange(x,y)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+
+ //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '420' is greater than the total number of loops '3'}}
+ #pragma omp fuse looprange(1,420)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+}
+
+// In a template context, but expression itself not instantiation-dependent
+template <typename T>
+static void templated_func() {
+
+ //expected-warning at +1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}}
+ #pragma omp fuse looprange(2,1)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+
+ //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '5' is greater than the total number of loops '3'}}
+ #pragma omp fuse looprange(3,3)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+
+}
+
+template <int V>
+static void templated_func_value_dependent() {
+
+ //expected-warning at +1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}}
+ #pragma omp fuse looprange(V,1)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+}
+
+template <typename T>
+static void templated_func_type_dependent() {
+ constexpr T s = 1;
+
+ //expected-error at +1 {{argument to 'looprange' clause must be a strictly positive integer value}}
+ #pragma omp fuse looprange(s,s-1)
+ {
+ for(int i = 0; i < 10; ++i);
+ for(int j = 0; j < 100; ++j);
+ for(int k = 0; k < 50; ++k);
+ }
+}
+
+
+void template_inst() {
+ // expected-note at +1 {{in instantiation of function template specialization 'templated_func<int>' requested here}}
+ templated_func<int>();
+ // expected-note at +1 {{in instantiation of function template specialization 'templated_func_value_dependent<1>' requested here}}
+ templated_func_value_dependent<1>();
+ // expected-note at +1 {{in instantiation of function template specialization 'templated_func_type_dependent<int>' requested here}}
+ templated_func_type_dependent<int>();
+
+}
+
+
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 80020763961fc..a8ca488a8dc0b 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2412,6 +2412,11 @@ void OMPClauseEnqueue::VisitOMPPartialClause(const OMPPartialClause *C) {
Visitor->AddStmt(C->getFactor());
}
+void OMPClauseEnqueue::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) {
+ Visitor->AddStmt(C->getFirst());
+ Visitor->AddStmt(C->getCount());
+}
+
void OMPClauseEnqueue::VisitOMPAllocatorClause(const OMPAllocatorClause *C) {
Visitor->AddStmt(C->getAllocator());
}
diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
index e0714e812e5cd..dd51274c1aaf5 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
@@ -1233,6 +1233,15 @@ struct WriteT {
using EmptyTrait = std::true_type;
};
+// V6: [6.4.7] Looprange clause
+template <typename T, typename I, typename E> struct LoopRangeT {
+ using Begin = E;
+ using End = E;
+
+ using TupleTrait = std::true_type;
+ std::tuple<Begin, End> t;
+};
+
// ---
template <typename T, typename I, typename E>
@@ -1263,9 +1272,10 @@ using TupleClausesT =
DefaultmapT<T, I, E>, DeviceT<T, I, E>, DistScheduleT<T, I, E>,
DoacrossT<T, I, E>, FromT<T, I, E>, GrainsizeT<T, I, E>,
IfT<T, I, E>, InitT<T, I, E>, InReductionT<T, I, E>,
- LastprivateT<T, I, E>, LinearT<T, I, E>, MapT<T, I, E>,
- NumTasksT<T, I, E>, OrderT<T, I, E>, ReductionT<T, I, E>,
- ScheduleT<T, I, E>, TaskReductionT<T, I, E>, ToT<T, I, E>>;
+ LastprivateT<T, I, E>, LinearT<T, I, E>, LoopRangeT<T, I, E>,
+ MapT<T, I, E>, NumTasksT<T, I, E>, OrderT<T, I, E>,
+ ReductionT<T, I, E>, ScheduleT<T, I, E>,
+ TaskReductionT<T, I, E>, ToT<T, I, E>>;
template <typename T, typename I, typename E>
using UnionClausesT = std::variant<DependT<T, I, E>>;
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index f33b3b1532d3d..366cc7ef853d3 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -271,6 +271,9 @@ def OMPC_Linear : Clause<"linear"> {
def OMPC_Link : Clause<"link"> {
let flangClass = "OmpObjectList";
}
+def OMPC_LoopRange : Clause<"looprange"> {
+ let clangClass = "OMPLoopRangeClause";
+}
def OMPC_Map : Clause<"map"> {
let clangClass = "OMPMapClause";
let flangClass = "OmpMapClause";
@@ -843,6 +846,9 @@ def OMP_For : Directive<"for"> {
let category = CA_Executable;
}
def OMP_Fuse : Directive<"fuse"> {
+ let allowedOnceClauses = [
+ VersionedClause<OMPC_LoopRange, 60>
+ ];
let association = AS_Loop;
let category = CA_Executable;
}
>From dbc440633099af24621b185036473333641bcc28 Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:30:39 +0000
Subject: [PATCH 3/7] Addef fuse to documentation
---
clang/docs/OpenMPSupport.rst | 2 ++
clang/docs/ReleaseNotes.rst | 1 +
2 files changed, 3 insertions(+)
diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst
index d6507071d4693..5f0e363792b32 100644
--- a/clang/docs/OpenMPSupport.rst
+++ b/clang/docs/OpenMPSupport.rst
@@ -376,6 +376,8 @@ implementation.
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| loop stripe transformation | :good:`done` | https://github.com/llvm/llvm-project/pull/119891 |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| loop fuse transformation | :good:`done` | :none:`unclaimed` | |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| work distribute construct | :none:`unclaimed` | :none:`unclaimed` | |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| task_iteration | :none:`unclaimed` | :none:`unclaimed` | |
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 1f0dbe565db6b..70fa866b8e5c9 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -901,6 +901,7 @@ OpenMP Support
- Added support 'no_openmp_constructs' assumption clause.
- Added support for 'self_maps' in map and requirement clause.
- Added support for 'omp stripe' directive.
+- Added support for 'omp fuse' directive.
Improvements
^^^^^^^^^^^^
>From 00095f49f8180a9da6690954850162dd81dd0e54 Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:43:41 +0000
Subject: [PATCH 4/7] Refactored preinits handling and improved coverage
---
clang/docs/OpenMPSupport.rst | 2 +-
clang/docs/ReleaseNotes.rst | 1 -
clang/include/clang/AST/StmtOpenMP.h | 5 +-
clang/include/clang/Sema/SemaOpenMP.h | 96 +-
clang/lib/AST/StmtOpenMP.cpp | 13 +
clang/lib/Basic/OpenMPKinds.cpp | 3 +-
clang/lib/CodeGen/CGExpr.cpp | 2 +
clang/lib/CodeGen/CodeGenFunction.h | 4 +
clang/lib/Sema/SemaOpenMP.cpp | 588 ++++---
clang/test/OpenMP/fuse_ast_print.cpp | 55 +
clang/test/OpenMP/fuse_codegen.cpp | 2117 +++++++++++++++----------
11 files changed, 1862 insertions(+), 1024 deletions(-)
diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst
index 5f0e363792b32..b39f9d3634a63 100644
--- a/clang/docs/OpenMPSupport.rst
+++ b/clang/docs/OpenMPSupport.rst
@@ -376,7 +376,7 @@ implementation.
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| loop stripe transformation | :good:`done` | https://github.com/llvm/llvm-project/pull/119891 |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| loop fuse transformation | :good:`done` | :none:`unclaimed` | |
+| loop fuse transformation | :good:`prototyped` | :none:`unclaimed` | |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| work distribute construct | :none:`unclaimed` | :none:`unclaimed` | |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 70fa866b8e5c9..1f0dbe565db6b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -901,7 +901,6 @@ OpenMP Support
- Added support 'no_openmp_constructs' assumption clause.
- Added support for 'self_maps' in map and requirement clause.
- Added support for 'omp stripe' directive.
-- Added support for 'omp fuse' directive.
Improvements
^^^^^^^^^^^^
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index 85bde292ca748..b6a948a8c6020 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -1005,8 +1005,7 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective {
Stmt::StmtClass C = T->getStmtClass();
return C == OMPTileDirectiveClass || C == OMPUnrollDirectiveClass ||
C == OMPReverseDirectiveClass || C == OMPInterchangeDirectiveClass ||
- C == OMPStripeDirectiveClass ||
- C == OMPFuseDirectiveClass;
+ C == OMPStripeDirectiveClass || C == OMPFuseDirectiveClass;
}
};
@@ -5653,6 +5652,8 @@ class OMPStripeDirective final : public OMPLoopTransformationDirective {
llvm::omp::OMPD_stripe, StartLoc, EndLoc,
NumLoops) {
setNumGeneratedLoops(2 * NumLoops);
+ // Similar to Tile, it only generates a single top level loop nest
+ setNumGeneratedLoopNests(1);
}
void setPreInits(Stmt *PreInits) {
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index f4a075e54cebe..ac4cbe3709a0d 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -1493,16 +1493,96 @@ class SemaOpenMP : public SemaBase {
SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
Stmt *&Body, SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits);
- /// Analyzes and checks a loop sequence for use by a loop transformation
+ /// @brief Categories of loops encountered during semantic OpenMP loop
+ /// analysis
+ ///
+ /// This enumeration identifies the structural category of a loop or sequence
+ /// of loops analyzed in the context of OpenMP transformations and directives.
+ /// This categorization helps differentiate between original source loops
+ /// and the structures resulting from applying OpenMP loop transformations.
+ enum class OMPLoopCategory {
+
+ /// @var OMPLoopCategory::RegularLoop
+ /// Represents a standard canonical loop nest found in the
+ /// original source code or an intact loop after transformations
+ /// (i.e Post/Pre loops of a loopranged fusion)
+ RegularLoop,
+
+ /// @var OMPLoopCategory::TransformSingleLoop
+ /// Represents the resulting loop structure when an OpenMP loop
+ // transformation, generates a single, top-level loop
+ TransformSingleLoop,
+
+ /// @var OMPLoopCategory::TransformLoopSequence
+ /// Represents the resulting loop structure when an OpenMP loop
+ /// transformation
+ /// generates a sequence of two or more canonical loop nests
+ TransformLoopSequence
+ };
+
+ /// The main recursive process of `checkTransformableLoopSequence` that
+ /// performs grammatical parsing of a canonical loop sequence. It extracts
+ /// key information, such as the number of top-level loops, loop statements,
+ /// helper expressions, and other relevant loop-related data, all in a single
+ /// execution to avoid redundant traversals. This analysis flattens inner
+ /// Loop Sequences
+ ///
+ /// \param LoopSeqStmt The AST of the original statement.
+ /// \param LoopSeqSize [out] Number of top level canonical loops.
+ /// \param NumLoops [out] Number of total canonical loops (nested too).
+ /// \param LoopHelpers [out] The multiple loop analyses results.
+ /// \param ForStmts [out] The multiple Stmt of each For loop.
+ /// \param OriginalInits [out] The raw original initialization statements
+ /// of each belonging to a loop of the loop sequence
+ /// \param TransformPreInits [out] The multiple collection of statements and
+ /// declarations that must have been executed/declared
+ /// before entering the loop (each belonging to a
+ /// particular loop transformation, nullptr otherwise)
+ /// \param LoopSequencePreInits [out] Additional general collection of loop
+ /// transformation related statements and declarations
+ /// not bounded to a particular loop that must be
+ /// executed before entering the loop transformation
+ /// \param LoopCategories [out] A sequence of OMPLoopCategory values,
+ /// one for each loop or loop transformation node
+ /// successfully analyzed.
+ /// \param Context
+ /// \param Kind The loop transformation directive kind.
+ /// \return Whether the original statement is both syntactically and
+ /// semantically correct according to OpenMP 6.0 canonical loop
+ /// sequence definition.
+ bool analyzeLoopSequence(
+ Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops,
+ SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
+ SmallVectorImpl<Stmt *> &ForStmts,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+ SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context,
+ OpenMPDirectiveKind Kind);
+
+ /// Validates and checks whether a loop sequence can be transformed according
+ /// to the given directive, providing necessary setup and initialization
+ /// (Driver function) before recursion using `analyzeLoopSequence`.
///
/// \param Kind The loop transformation directive kind.
- /// \param NumLoops [out] Number of total canonical loops
- /// \param LoopSeqSize [out] Number of top level canonical loops
+ /// \param AStmt The AST of the original statement
+ /// \param LoopSeqSize [out] Number of top level canonical loops.
+ /// \param NumLoops [out] Number of total canonical loops (nested too)
/// \param LoopHelpers [out] The multiple loop analyses results.
- /// \param LoopStmts [out] The multiple Stmt of each For loop.
- /// \param OriginalInits [out] The multiple collection of statements and
+ /// \param ForStmts [out] The multiple Stmt of each For loop.
+ /// \param OriginalInits [out] The raw original initialization statements
+ /// of each belonging to a loop of the loop sequence
+ /// \param TransformsPreInits [out] The multiple collection of statements and
/// declarations that must have been executed/declared
- /// before entering the loop.
+ /// before entering the loop (each belonging to a
+ /// particular loop transformation, nullptr otherwise)
+ /// \param LoopSequencePreInits [out] Additional general collection of loop
+ /// transformation related statements and declarations
+ /// not bounded to a particular loop that must be
+ /// executed before entering the loop transformation
+ /// \param LoopCategories [out] A sequence of OMPLoopCategory values,
+ /// one for each loop or loop transformation node
+ /// successfully analyzed.
/// \param Context
/// \return Whether there was an absence of errors or not
bool checkTransformableLoopSequence(
@@ -1511,7 +1591,9 @@ class SemaOpenMP : public SemaBase {
SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
SmallVectorImpl<Stmt *> &ForStmts,
SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
- ASTContext &Context);
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+ SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context);
/// Helper to keep information about the current `omp begin/end declare
/// variant` nesting.
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index 6a2ac64f4e40b..da00b8eeeb2d4 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -457,6 +457,8 @@ OMPUnrollDirective::Create(const ASTContext &C, SourceLocation StartLoc,
C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc);
Dir->setNumGeneratedLoops(NumGeneratedLoops);
// The number of generated loops and loop nests during unroll matches
+ // given that unroll only generates top level canonical loop nests
+ // so each generated loop is a top level canonical loop nest
Dir->setNumGeneratedLoopNests(NumGeneratedLoops);
Dir->setTransformedStmt(TransformedStmt);
Dir->setPreInits(PreInits);
@@ -517,6 +519,17 @@ OMPFuseDirective *OMPFuseDirective::Create(
NumLoops);
Dir->setTransformedStmt(TransformedStmt);
Dir->setPreInits(PreInits);
+ // The number of top level canonical nests could
+ // not match the total number of generated loops
+ // Example:
+ // Before fusion:
+ // for (int i = 0; i < N; ++i)
+ // for (int j = 0; j < M; ++j)
+ // A[i][j] = i + j;
+ //
+ // for (int k = 0; k < P; ++k)
+ // B[k] = k * 2;
+ // Here, NumLoopNests = 2, but NumLoops = 3.
Dir->setNumGeneratedLoopNests(NumLoopNests);
Dir->setNumGeneratedLoops(NumLoops);
return Dir;
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index 3c62b61f3a438..12ebead63d9ba 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -704,7 +704,8 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) {
bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) {
return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse ||
- DKind == OMPD_interchange || DKind == OMPD_stripe || DKind == OMPD_fuse;
+ DKind == OMPD_interchange || DKind == OMPD_stripe ||
+ DKind == OMPD_fuse;
}
bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) {
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 1a835c97decef..047c60bb07378 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3223,6 +3223,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
// No other cases for now.
} else {
+ llvm::dbgs() << "THE DAMN DECLREFEXPR HASN'T BEEN ENTERED IN LOCALDECLMAP\n";
+ VD->dumpColor();
llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?");
}
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 59cb4d9caa98d..fc98ca0b30a7f 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -5379,6 +5379,10 @@ class CodeGenFunction : public CodeGenTypeCache {
/// Set the address of a local variable.
void setAddrOfLocalVar(const VarDecl *VD, Address Addr) {
+ if (LocalDeclMap.count(VD)) {
+ llvm::errs() << "Warning: VarDecl already exists in map: ";
+ VD->dumpColor();
+ }
assert(!LocalDeclMap.count(VD) && "Decl already exists in LocalDeclMap!");
LocalDeclMap.insert({VD, Addr});
}
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 8cd56d1af6ac8..30f8cd3087268 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -22,6 +22,7 @@
#include "clang/AST/DeclOpenMP.h"
#include "clang/AST/DynamicRecursiveASTVisitor.h"
#include "clang/AST/OpenMPClause.h"
+#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/StmtCXX.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/AST/StmtVisitor.h"
@@ -47,6 +48,7 @@
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/IR/Assumptions.h"
#include <optional>
+#include <queue>
using namespace clang;
using namespace llvm::omp;
@@ -14125,6 +14127,45 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective(
getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
}
+// Overloaded base case function
+template <typename T, typename F>
+static bool tryHandleAs(T *t, F &&) {
+ return false;
+}
+
+/**
+ * Tries to recursively cast `t` to one of the given types and invokes `f` if successful.
+ *
+ * @tparam Class The first type to check.
+ * @tparam Rest The remaining types to check.
+ * @tparam T The base type of `t`.
+ * @tparam F The callable type for the function to invoke upon a successful cast.
+ * @param t The object to be checked.
+ * @param f The function to invoke if `t` matches `Class`.
+ * @return `true` if `t` matched any type and `f` was called, otherwise `false`.
+ */
+template <typename Class, typename... Rest, typename T, typename F>
+static bool tryHandleAs(T *t, F &&f) {
+ if (Class *c = dyn_cast<Class>(t)) {
+ f(c);
+ return true;
+ } else {
+ return tryHandleAs<Rest...>(t, std::forward<F>(f));
+ }
+}
+
+// Updates OriginalInits by checking Transform against loop transformation
+// directives and appending their pre-inits if a match is found.
+static void updatePreInits(OMPLoopBasedDirective *Transform,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &PreInits) {
+ if (!tryHandleAs<OMPTileDirective, OMPUnrollDirective, OMPReverseDirective,
+ OMPInterchangeDirective, OMPFuseDirective>(
+ Transform, [&PreInits](auto *Dir) {
+ appendFlattenedStmtList(PreInits.back(), Dir->getPreInits());
+ }))
+ llvm_unreachable("Unhandled loop transformation");
+}
+
bool SemaOpenMP::checkTransformableLoopNest(
OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops,
SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
@@ -14155,121 +14196,106 @@ bool SemaOpenMP::checkTransformableLoopNest(
return false;
},
[&OriginalInits](OMPLoopBasedDirective *Transform) {
- Stmt *DependentPreInits;
- if (auto *Dir = dyn_cast<OMPTileDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPStripeDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPUnrollDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPReverseDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPInterchangeDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPFuseDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else
- llvm_unreachable("Unhandled loop transformation");
-
- appendFlattenedStmtList(OriginalInits.back(), DependentPreInits);
+ updatePreInits(Transform, OriginalInits);
});
assert(OriginalInits.back().empty() && "No preinit after innermost loop");
OriginalInits.pop_back();
return Result;
}
-class NestedLoopCounterVisitor
- : public clang::RecursiveASTVisitor<NestedLoopCounterVisitor> {
+// Counts the total number of nested loops, including the outermost loop (the
+// original loop). PRECONDITION of this visitor is that it must be invoked from
+// the original loop to be analyzed. The traversal is stop for Decl's and
+// Expr's given that they may contain inner loops that must not be counted.
+//
+// Example AST structure for the code:
+//
+// int main() {
+// #pragma omp fuse
+// {
+// for (int i = 0; i < 100; i++) { <-- Outer loop
+// []() {
+// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP
+// };
+// for(int j = 0; j < 5; ++j) {} <-- Inner loop
+// }
+// for (int r = 0; i < 100; i++) { <-- Outer loop
+// struct LocalClass {
+// void bar() {
+// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP
+// }
+// };
+// for(int k = 0; k < 10; ++k) {} <-- Inner loop
+// {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP
+// }
+// }
+// }
+// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops
+class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor {
+private:
+ unsigned NestedLoopCount = 0;
+
public:
- explicit NestedLoopCounterVisitor() : NestedLoopCount(0) {}
+ explicit NestedLoopCounterVisitor() {}
- bool VisitForStmt(clang::ForStmt *FS) {
- ++NestedLoopCount;
- return true;
+ unsigned getNestedLoopCount() const { return NestedLoopCount; }
+
+ bool VisitForStmt(ForStmt *FS) override {
+ ++NestedLoopCount;
+ return true;
}
- bool VisitCXXForRangeStmt(clang::CXXForRangeStmt *FRS) {
- ++NestedLoopCount;
- return true;
+ bool VisitCXXForRangeStmt(CXXForRangeStmt *FRS) override {
+ ++NestedLoopCount;
+ return true;
}
- unsigned getNestedLoopCount() const { return NestedLoopCount; }
+ bool TraverseStmt(Stmt *S) override {
+ if (!S)
+ return true;
-private:
- unsigned NestedLoopCount;
+ // Skip traversal of all expressions, including special cases like
+ // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions
+ // may contain inner statements (and even loops), but they are not part
+ // of the syntactic body of the surrounding loop structure.
+ // Therefore must not be counted
+ if (isa<Expr>(S))
+ return true;
+
+ // Only recurse into CompoundStmt (block {}) and loop bodies
+ if (isa<CompoundStmt>(S) || isa<ForStmt>(S) ||
+ isa<CXXForRangeStmt>(S)) {
+ return DynamicRecursiveASTVisitor::TraverseStmt(S);
+ }
+
+ // Stop traversal of the rest of statements, that break perfect
+ // loop nesting, such as control flow (IfStmt, SwitchStmt...)
+ return true;
+ }
+
+ bool TraverseDecl(Decl *D) override {
+ // Stop in the case of finding a declaration, it is not important
+ // in order to find nested loops (Possible CXXRecordDecl, RecordDecl,
+ // FunctionDecl...)
+ return true;
+ }
};
-bool SemaOpenMP::checkTransformableLoopSequence(
- OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize,
- unsigned &NumLoops,
+bool SemaOpenMP::analyzeLoopSequence(
+ Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops,
SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
SmallVectorImpl<Stmt *> &ForStmts,
SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
- ASTContext &Context) {
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+ SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context,
+ OpenMPDirectiveKind Kind) {
- // Checks whether the given statement is a compound statement
VarsWithInheritedDSAType TmpDSA;
- if (!isa<CompoundStmt>(AStmt)) {
- Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence)
- << getOpenMPDirectiveName(Kind);
- return false;
- }
- // Callback for updating pre-inits in case there are even more
- // loop-sequence-generating-constructs inside of the main compound stmt
- auto OnTransformationCallback =
- [&OriginalInits](OMPLoopBasedDirective *Transform) {
- Stmt *DependentPreInits;
- if (auto *Dir = dyn_cast<OMPTileDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPUnrollDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPReverseDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPInterchangeDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else if (auto *Dir = dyn_cast<OMPFuseDirective>(Transform))
- DependentPreInits = Dir->getPreInits();
- else
- llvm_unreachable("Unhandled loop transformation");
-
- appendFlattenedStmtList(OriginalInits.back(), DependentPreInits);
- };
-
- // Number of top level canonical loop nests observed (And acts as index)
- LoopSeqSize = 0;
- // Number of total observed loops
- NumLoops = 0;
-
- // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows
- // the grammar:
- //
- // canonical-loop-sequence:
- // {
- // loop-sequence+
- // }
- // where loop-sequence can be any of the following:
- // 1. canonical-loop-sequence
- // 2. loop-nest
- // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective)
- //
- // To recognise and traverse this structure the following helper functions
- // have been defined. handleLoopSequence serves as the recurisve entry point
- // and tries to match the input AST to the canonical loop sequence grammar
- // structure
-
- // Helper functions to validate canonical loop sequence grammar is valid
- auto isLoopSequenceDerivation = [](auto *Child) {
- return isa<ForStmt>(Child) || isa<CXXForRangeStmt>(Child) ||
- isa<OMPLoopTransformationDirective>(Child);
- };
- auto isLoopGeneratingStmt = [](auto *Child) {
- return isa<OMPLoopTransformationDirective>(Child);
- };
-
+ QualType BaseInductionVarType;
// Helper Lambda to handle storing initialization and body statements for both
// ForStmt and CXXForRangeStmt and checks for any possible mismatch between
// induction variables types
- QualType BaseInductionVarType;
auto storeLoopStatements = [&OriginalInits, &ForStmts, &BaseInductionVarType,
this, &Context](Stmt *LoopStmt) {
if (auto *For = dyn_cast<ForStmt>(LoopStmt)) {
@@ -14292,33 +14318,35 @@ bool SemaOpenMP::checkTransformableLoopSequence(
}
}
}
-
} else {
- assert(isa<CXXForRangeStmt>(LoopStmt) &&
- "Expected canonical for or range-based for loops.");
- auto *CXXFor = dyn_cast<CXXForRangeStmt>(LoopStmt);
+ auto *CXXFor = cast<CXXForRangeStmt>(LoopStmt);
OriginalInits.back().push_back(CXXFor->getBeginStmt());
ForStmts.push_back(CXXFor);
}
};
+
// Helper lambda functions to encapsulate the processing of different
// derivations of the canonical loop sequence grammar
//
// Modularized code for handling loop generation and transformations
- auto handleLoopGeneration = [&storeLoopStatements, &LoopHelpers,
- &OriginalInits, &LoopSeqSize, &NumLoops, Kind,
- &TmpDSA, &OnTransformationCallback,
- this](Stmt *Child) {
+ auto analyzeLoopGeneration = [&storeLoopStatements, &LoopHelpers,
+ &OriginalInits, &TransformsPreInits,
+ &LoopCategories, &LoopSeqSize, &NumLoops, Kind,
+ &TmpDSA, &ForStmts, &Context,
+ &LoopSequencePreInits, this](Stmt *Child) {
auto LoopTransform = dyn_cast<OMPLoopTransformationDirective>(Child);
Stmt *TransformedStmt = LoopTransform->getTransformedStmt();
unsigned NumGeneratedLoopNests = LoopTransform->getNumGeneratedLoopNests();
-
+ unsigned NumGeneratedLoops = LoopTransform->getNumGeneratedLoops();
// Handle the case where transformed statement is not available due to
// dependent contexts
if (!TransformedStmt) {
- if (NumGeneratedLoopNests > 0)
+ if (NumGeneratedLoopNests > 0) {
+ LoopSeqSize += NumGeneratedLoopNests;
+ NumLoops += NumGeneratedLoops;
return true;
- // Unroll full
+ }
+ // Unroll full (0 loops produced)
else {
Diag(Child->getBeginLoc(), diag::err_omp_not_for)
<< 0 << getOpenMPDirectiveName(Kind);
@@ -14331,38 +14359,56 @@ bool SemaOpenMP::checkTransformableLoopSequence(
Diag(Child->getBeginLoc(), diag::err_omp_not_for)
<< 0 << getOpenMPDirectiveName(Kind);
return false;
- // Future loop transformations that generate multiple canonical loops
- } else if (NumGeneratedLoopNests > 1) {
- llvm_unreachable("Multiple canonical loop generating transformations "
- "like loop splitting are not yet supported");
}
+ // Loop transformatons such as split or loopranged fuse
+ else if (NumGeneratedLoopNests > 1) {
+ // Get the preinits related to this loop sequence generating
+ // loop transformation (i.e loopranged fuse, split...)
+ LoopSequencePreInits.emplace_back();
+ // These preinits differ slightly from regular inits/pre-inits related
+ // to single loop generating loop transformations (interchange, unroll)
+ // given that they are not bounded to a particular loop nest
+ // so they need to be treated independently
+ updatePreInits(LoopTransform, LoopSequencePreInits);
+ return analyzeLoopSequence(TransformedStmt, LoopSeqSize, NumLoops,
+ LoopHelpers, ForStmts, OriginalInits,
+ TransformsPreInits, LoopSequencePreInits,
+ LoopCategories, Context, Kind);
+ }
+ // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all)
+ else {
+ // Process the transformed loop statement
+ OriginalInits.emplace_back();
+ TransformsPreInits.emplace_back();
+ LoopHelpers.emplace_back();
+ LoopCategories.push_back(OMPLoopCategory::TransformSingleLoop);
+
+ unsigned IsCanonical =
+ checkOpenMPLoop(Kind, nullptr, nullptr, TransformedStmt, SemaRef,
+ *DSAStack, TmpDSA, LoopHelpers[LoopSeqSize]);
+
+ if (!IsCanonical) {
+ Diag(TransformedStmt->getBeginLoc(), diag::err_omp_not_canonical_loop)
+ << getOpenMPDirectiveName(Kind);
+ return false;
+ }
+ storeLoopStatements(TransformedStmt);
+ updatePreInits(LoopTransform, TransformsPreInits);
- // Process the transformed loop statement
- Child = TransformedStmt;
- OriginalInits.emplace_back();
- LoopHelpers.emplace_back();
- OnTransformationCallback(LoopTransform);
-
- unsigned IsCanonical =
- checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack,
- TmpDSA, LoopHelpers[LoopSeqSize]);
-
- if (!IsCanonical) {
- Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop)
- << getOpenMPDirectiveName(Kind);
- return false;
+ NumLoops += NumGeneratedLoops;
+ ++LoopSeqSize;
+ return true;
}
- storeLoopStatements(TransformedStmt);
- NumLoops += LoopTransform->getNumGeneratedLoops();
- return true;
};
// Modularized code for handling regular canonical loops
- auto handleRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits,
- &LoopSeqSize, &NumLoops, Kind, &TmpDSA,
- this](Stmt *Child) {
+ auto analyzeRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits,
+ &LoopSeqSize, &NumLoops, Kind, &TmpDSA,
+ &LoopCategories, this](Stmt *Child) {
OriginalInits.emplace_back();
LoopHelpers.emplace_back();
+ LoopCategories.push_back(OMPLoopCategory::RegularLoop);
+
unsigned IsCanonical =
checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack,
TmpDSA, LoopHelpers[LoopSeqSize]);
@@ -14380,57 +14426,114 @@ bool SemaOpenMP::checkTransformableLoopSequence(
return true;
};
- // Helper function to process a Loop Sequence Recursively
- auto handleLoopSequence = [&](Stmt *LoopSeqStmt,
- auto &handleLoopSequenceCallback) -> bool {
- for (auto *Child : LoopSeqStmt->children()) {
- if (!Child)
- continue;
+ // Helper functions to validate canonical loop sequence grammar is valid
+ auto isLoopSequenceDerivation = [](auto *Child) {
+ return isa<ForStmt>(Child) || isa<CXXForRangeStmt>(Child) ||
+ isa<OMPLoopTransformationDirective>(Child);
+ };
+ auto isLoopGeneratingStmt = [](auto *Child) {
+ return isa<OMPLoopTransformationDirective>(Child);
+ };
+
- // Skip over non-loop-sequence statements
- if (!isLoopSequenceDerivation(Child)) {
- Child = Child->IgnoreContainers();
+ // High level grammar validation
+ for (auto *Child : LoopSeqStmt->children()) {
- // Ignore empty compound statement
if (!Child)
- continue;
+ continue;
- // In the case of a nested loop sequence ignoring containers would not
- // be enough, a recurisve transversal of the loop sequence is required
- if (isa<CompoundStmt>(Child)) {
- if (!handleLoopSequenceCallback(Child, handleLoopSequenceCallback))
- return false;
- // Already been treated, skip this children
- continue;
+ // Skip over non-loop-sequence statements
+ if (!isLoopSequenceDerivation(Child)) {
+ Child = Child->IgnoreContainers();
+
+ // Ignore empty compound statement
+ if (!Child)
+ continue;
+
+ // In the case of a nested loop sequence ignoring containers would not
+ // be enough, a recurisve transversal of the loop sequence is required
+ if (isa<CompoundStmt>(Child)) {
+ if (!analyzeLoopSequence(Child, LoopSeqSize, NumLoops, LoopHelpers,
+ ForStmts, OriginalInits, TransformsPreInits,
+ LoopSequencePreInits, LoopCategories, Context,
+ Kind))
+ return false;
+ // Already been treated, skip this children
+ continue;
+ }
+ }
+ // Regular loop sequence handling
+ if (isLoopSequenceDerivation(Child)) {
+ if (isLoopGeneratingStmt(Child)) {
+ if (!analyzeLoopGeneration(Child)) {
+ return false;
}
+ // analyzeLoopGeneration updates Loop Sequence size accordingly
+
+ } else {
+ if (!analyzeRegularLoop(Child)) {
+ return false;
+ }
+ // Update the Loop Sequence size by one
+ ++LoopSeqSize;
}
- // Regular loop sequence handling
- if (isLoopSequenceDerivation(Child)) {
- if (isLoopGeneratingStmt(Child)) {
- if (!handleLoopGeneration(Child)) {
- return false;
- }
} else {
- if (!handleRegularLoop(Child)) {
- return false;
- }
+ // Report error for invalid statement inside canonical loop sequence
+ Diag(Child->getBeginLoc(), diag::err_omp_not_for)
+ << 0 << getOpenMPDirectiveName(Kind);
+ return false;
}
- ++LoopSeqSize;
- } else {
- // Report error for invalid statement inside canonical loop sequence
- Diag(Child->getBeginLoc(), diag::err_omp_not_for)
- << 0 << getOpenMPDirectiveName(Kind);
+ }
+ return true;
+}
+
+bool SemaOpenMP::checkTransformableLoopSequence(
+ OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize,
+ unsigned &NumLoops,
+ SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
+ SmallVectorImpl<Stmt *> &ForStmts,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &OriginalInits,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &TransformsPreInits,
+ SmallVectorImpl<SmallVector<Stmt *, 0>> &LoopSequencePreInits,
+ SmallVectorImpl<OMPLoopCategory> &LoopCategories, ASTContext &Context) {
+
+ // Checks whether the given statement is a compound statement
+ if (!isa<CompoundStmt>(AStmt)) {
+ Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence)
+ << getOpenMPDirectiveName(Kind);
return false;
- }
- }
- return true;
- };
+ }
+ // Number of top level canonical loop nests observed (And acts as index)
+ LoopSeqSize = 0;
+ // Number of total observed loops
+ NumLoops = 0;
+
+ // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows
+ // the grammar:
+ //
+ // canonical-loop-sequence:
+ // {
+ // loop-sequence+
+ // }
+ // where loop-sequence can be any of the following:
+ // 1. canonical-loop-sequence
+ // 2. loop-nest
+ // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective)
+ //
+ // To recognise and traverse this structure the following helper functions
+ // have been defined. analyzeLoopSequence serves as the recurisve entry point
+ // and tries to match the input AST to the canonical loop sequence grammar
+ // structure. This function will perform both a semantic and syntactical
+ // analysis of the given statement according to OpenMP 6.0 definition of
+ // the aforementioned canonical loop sequence
// Recursive entry point to process the main loop sequence
- if (!handleLoopSequence(AStmt, handleLoopSequence)) {
- return false;
+ if (!analyzeLoopSequence(AStmt, LoopSeqSize, NumLoops, LoopHelpers, ForStmts,
+ OriginalInits, TransformsPreInits,
+ LoopSequencePreInits, LoopCategories, Context,
+ Kind)) {
+ return false;
}
-
if (LoopSeqSize <= 0) {
Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence)
<< getOpenMPDirectiveName(Kind);
@@ -14462,9 +14565,7 @@ static void addLoopPreInits(ASTContext &Context,
RangeEnd->getBeginLoc(),
RangeEnd->getEndLoc()));
}
-
llvm::append_range(PreInits, OriginalInit);
-
// List of OMPCapturedExprDecl, for __begin, __end, and NumIterations
if (auto *PI = cast_or_null<DeclStmt>(LoopHelper.PreInits)) {
PreInits.push_back(new (Context) DeclStmt(
@@ -15132,7 +15233,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
Stmt *LoopStmt = nullptr;
collectLoopStmts(AStmt, {LoopStmt});
- // Determine the PreInit declarations.
+ // Determine the PreInit declarations.e
SmallVector<Stmt *, 4> PreInits;
addLoopPreInits(Context, LoopHelper, LoopStmt, OriginalInits[0], PreInits);
@@ -15698,28 +15799,35 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
if (!AStmt) {
return StmtError();
}
+
+ unsigned NumLoops = 1;
+ unsigned LoopSeqSize = 1;
+
+ // Defer transformation in dependent contexts
+ // The NumLoopNests argument is set to a placeholder 1 (even though
+ // using looprange fuse could yield up to 3 top level loop nests)
+ // because a dependent context could prevent determining its true value
+ if (CurrContext->isDependentContext()) {
+ return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses,
+ NumLoops, LoopSeqSize, AStmt, nullptr,
+ nullptr);
+ }
+
// Validate that the potential loop sequence is transformable for fusion
// Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops
SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers;
SmallVector<Stmt *> LoopStmts;
SmallVector<SmallVector<Stmt *, 0>> OriginalInits;
-
- unsigned NumLoops;
- unsigned LoopSeqSize;
+ SmallVector<SmallVector<Stmt *, 0>> TransformsPreInits;
+ SmallVector<SmallVector<Stmt *, 0>> LoopSequencePreInits;
+ SmallVector<OMPLoopCategory, 0> LoopCategories;
if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops,
LoopHelpers, LoopStmts, OriginalInits,
- Context)) {
+ TransformsPreInits, LoopSequencePreInits,
+ LoopCategories, Context)) {
return StmtError();
}
- // Defer transformation in dependent contexts
- // The NumLoopNests argument is set to a placeholder (0)
- // because a dependent context could prevent determining its true value
- if (CurrContext->isDependentContext()) {
- return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses,
- NumLoops, 0, AStmt, nullptr, nullptr);
- }
-
// Handle clauses, which can be any of the following: [looprange, apply]
const OMPLoopRangeClause *LRC =
OMPExecutableDirective::getSingleClause<OMPLoopRangeClause>(Clauses);
@@ -15781,11 +15889,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
"Expecting loop iteration space dimensionality to match number of "
"affected loops");
- // PreInits hold a sequence of variable declarations that must be executed
- // before the fused loop begins. These include bounds, strides, and other
- // helper variables required for the transformation.
- SmallVector<Stmt *> PreInits;
-
// Select the type with the largest bit width among all induction variables
QualType IVType = LoopHelpers[FirstVal - 1].IterationVarRef->getType();
for (unsigned int I = FirstVal; I < LastVal; ++I) {
@@ -15797,7 +15900,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
uint64_t IVBitWidth = Context.getIntWidth(IVType);
// Create pre-init declarations for all loops lower bounds, upper bounds,
- // strides and num-iterations
+ // strides and num-iterations for every top level loop in the fusion
SmallVector<VarDecl *, 4> LBVarDecls;
SmallVector<VarDecl *, 4> STVarDecls;
SmallVector<VarDecl *, 4> NIVarDecls;
@@ -15835,12 +15938,62 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
return std::make_pair(VD, DeclStmt);
};
+ // PreInits hold a sequence of variable declarations that must be executed
+ // before the fused loop begins. These include bounds, strides, and other
+ // helper variables required for the transformation. Other loop transforms
+ // also contain their own preinits
+ SmallVector<Stmt *> PreInits;
+ // Iterator to keep track of loop transformations
+ unsigned int TransformIndex = 0;
+
+ // Update the general preinits using the preinits generated by loop sequence
+ // generating loop transformations. These preinits differ slightly from
+ // single-loop transformation preinits, as they can be detached from a
+ // specific loop inside the multiple generated loop nests. This happens
+ // because certain helper variables, like '.omp.fuse.max', are introduced to
+ // handle fused iteration spaces and may not be directly tied to a single
+ // original loop. the preinit structure must ensure that hidden variables
+ // like '.omp.fuse.max' are still properly handled.
+ // Transformations that apply this concept: Loopranged Fuse, Split
+ if (!LoopSequencePreInits.empty()) {
+ for (const auto <PreInits : LoopSequencePreInits) {
+ if (!LTPreInits.empty()) {
+ llvm::append_range(PreInits, LTPreInits);
+ }
+ }
+ }
+
// Process each single loop to generate and collect declarations
- // and statements for all helper expressions
+ // and statements for all helper expressions related to
+ // particular single loop nests
+
+ // Also In the case of the fused loops, we keep track of their original
+ // inits by appending them to their preinits statement, and in the case of
+ // transformations, also append their preinits (which contain the original
+ // loop initialization statement or other statements)
+
+ // Firstly we need to update TransformIndex to match the begining of the
+ // looprange section
+ for (unsigned int I = 0; I < FirstVal - 1; ++I) {
+ if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop)
+ ++TransformIndex;
+ }
for (unsigned int I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) {
- addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I],
- PreInits);
+ if (LoopCategories[I] == OMPLoopCategory::RegularLoop) {
+ addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I],
+ PreInits);
+ } else if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) {
+ // For transformed loops, insert both pre-inits and original inits.
+ // Order matters: pre-inits may define variables used in the original
+ // inits such as upper bounds...
+ auto TransformPreInit = TransformsPreInits[TransformIndex++];
+ if (!TransformPreInit.empty()) {
+ llvm::append_range(PreInits, TransformPreInit);
+ }
+ addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I],
+ PreInits);
+ }
auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", J);
auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", J);
auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", J);
@@ -15859,7 +16012,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
NIVarDecls.push_back(NIVD);
IVVarDecls.push_back(IVVD);
- PreInits.push_back(UBDStmt.get());
PreInits.push_back(LBDStmt.get());
PreInits.push_back(STDStmt.get());
PreInits.push_back(NIDStmt.get());
@@ -16035,6 +16187,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
BodyStmts.push_back(IdxExpr.get());
llvm::append_range(BodyStmts, LoopHelpers[I].Updates);
+ // If the loop is a CXXForRangeStmt then the iterator variable is needed
if (auto *SourceCXXFor = dyn_cast<CXXForRangeStmt>(LoopStmts[I]))
BodyStmts.push_back(SourceCXXFor->getLoopVarStmt());
@@ -16069,21 +16222,50 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(),
IncrExpr.get()->getEndLoc());
- // In the case of looprange, the result of fuse won't simply
- // be a single loop (ForStmt), but rather a loop sequence
- // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop
- // and the post-fusion loops, preserving its original order.
+ // In the case of looprange, the result of fuse won't simply
+ // be a single loop (ForStmt), but rather a loop sequence
+ // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop
+ // and the post-fusion loops, preserving its original order.
+ //
+ // Note: If looprange clause produces a single fused loop nest then
+ // this compound statement wrapper is unnecessary (Therefore this
+ // treatment is skipped)
+
Stmt *FusionStmt = FusedForStmt;
- if (LRC) {
+ if (LRC && CountVal != LoopSeqSize) {
SmallVector<Stmt *, 4> FinalLoops;
- // Gather all the pre-fusion loops
- for (unsigned I = 0; I < FirstVal - 1; ++I)
- FinalLoops.push_back(LoopStmts[I]);
- // Gather the fused loop
- FinalLoops.push_back(FusedForStmt);
- // Gather all the post-fusion loops
- for (unsigned I = FirstVal + CountVal - 1; I < LoopSeqSize; ++I)
+ // Reset the transform index
+ TransformIndex = 0;
+
+ // Collect all non-fused loops before and after the fused region.
+ // Pre-fusion and post-fusion loops are inserted in order exploiting their
+ // symmetry, along with their corresponding transformation pre-inits if
+ // needed. The fused loop is added between the two regions.
+ for (unsigned I = 0; I < LoopSeqSize; ++I) {
+ if (I >= FirstVal - 1 && I < FirstVal + CountVal - 1) {
+ // Update the Transformation counter to skip already treated
+ // loop transformations
+ if (LoopCategories[I] != OMPLoopCategory::TransformSingleLoop)
+ ++TransformIndex;
+ continue;
+ }
+
+ // No need to handle:
+ // Regular loops: they are kept intact as-is.
+ // Loop-sequence-generating transformations: already handled earlier.
+ // Only TransformSingleLoop requires inserting pre-inits here
+
+ if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) {
+ auto TransformPreInit = TransformsPreInits[TransformIndex++];
+ if (!TransformPreInit.empty()) {
+ llvm::append_range(PreInits, TransformPreInit);
+ }
+ }
+
FinalLoops.push_back(LoopStmts[I]);
+ }
+
+ FinalLoops.insert(FinalLoops.begin() + (FirstVal - 1), FusedForStmt);
FusionStmt = CompoundStmt::Create(Context, FinalLoops, FPOptionsOverride(),
SourceLocation(), SourceLocation());
}
diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp
index ac4f0d38a9c68..9d85bd1172948 100644
--- a/clang/test/OpenMP/fuse_ast_print.cpp
+++ b/clang/test/OpenMP/fuse_ast_print.cpp
@@ -338,6 +338,61 @@ void tfoo9() {
foo9<1, 2>();
}
+// PRINT-LABEL: void foo10(
+// DUMP-LABEL: FunctionDecl {{.*}} foo10
+void foo10() {
+ // PRINT: #pragma omp fuse looprange(2,2)
+ // DUMP: OMPFuseDirective
+ // DUMP: OMPLooprangeClause
+ #pragma omp fuse looprange(2,2)
+ // PRINT: {
+ // DUMP: CompoundStmt
+ {
+ // PRINT: for (int i = 0; i < 10; i += 2)
+ // DUMP: ForStmt
+ for (int i = 0; i < 10; i += 2)
+ // PRINT: body(i)
+ // DUMP: CallExpr
+ body(i);
+ // PRINT: for (int ii = 0; ii < 10; ii += 2)
+ // DUMP: ForStmt
+ for (int ii = 0; ii < 10; ii += 2)
+ // PRINT: body(ii)
+ // DUMP: CallExpr
+ body(ii);
+ // PRINT: #pragma omp fuse looprange(2,2)
+ // DUMP: OMPFuseDirective
+ // DUMP: OMPLooprangeClause
+ #pragma omp fuse looprange(2,2)
+ {
+ // PRINT: for (int j = 10; j > 0; --j)
+ // DUMP: ForStmt
+ for (int j = 10; j > 0; --j)
+ // PRINT: body(j)
+ // DUMP: CallExpr
+ body(j);
+ // PRINT: for (int jj = 10; jj > 0; --jj)
+ // DUMP: ForStmt
+ for (int jj = 10; jj > 0; --jj)
+ // PRINT: body(jj)
+ // DUMP: CallExpr
+ body(jj);
+ // PRINT: for (int k = 0; k <= 10; ++k)
+ // DUMP: ForStmt
+ for (int k = 0; k <= 10; ++k)
+ // PRINT: body(k)
+ // DUMP: CallExpr
+ body(k);
+ // PRINT: for (int kk = 0; kk <= 10; ++kk)
+ // DUMP: ForStmt
+ for (int kk = 0; kk <= 10; ++kk)
+ // PRINT: body(kk)
+ // DUMP: CallExpr
+ body(kk);
+ }
+ }
+
+}
diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp
index d9500bed3ce31..742c280ed0172 100644
--- a/clang/test/OpenMP/fuse_codegen.cpp
+++ b/clang/test/OpenMP/fuse_codegen.cpp
@@ -65,6 +65,23 @@ extern "C" void foo4() {
}
}
+// This exemplifies the usage of loop transformations that generate
+// more than top level canonical loop nests (e.g split, loopranged fuse...)
+extern "C" void foo5() {
+ double arr[256];
+ #pragma omp fuse looprange(2,2)
+ {
+ #pragma omp fuse looprange(2,2)
+ {
+ for(int i = 0; i < 128; ++i) body(i);
+ for(int j = 0; j < 256; j+=2) body(j);
+ for(int k = 0; k < 512; ++k) body(k);
+ }
+ for(int c = 42; auto &&v: arr) body(c,v);
+ for(int cc = 37; auto &&vv: arr) body(cc, vv);
+ }
+}
+
#endif
// CHECK1-LABEL: define dso_local void @body(
@@ -88,7 +105,6 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
@@ -97,7 +113,6 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -129,107 +144,103 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1
// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP9]], ptr [[J]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1
// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]]
// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
// CHECK1: [[COND_TRUE]]:
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
// CHECK1-NEXT: br label %[[COND_END:.*]]
// CHECK1: [[COND_FALSE]]:
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
// CHECK1-NEXT: br label %[[COND_END]]
// CHECK1: [[COND_END]]:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ]
// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
// CHECK1-NEXT: br label %[[FOR_COND:.*]]
// CHECK1: [[FOR_COND]]:
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
-// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]]
// CHECK1-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
// CHECK1: [[FOR_BODY]]:
-// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]]
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
// CHECK1-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
// CHECK1: [[IF_THEN]]:
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
-// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]]
-// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]]
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]]
+// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]]
// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
-// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
-// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]]
-// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]]
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]]
// CHECK1-NEXT: store i32 [[ADD20]], ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP35]])
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP33]])
// CHECK1-NEXT: br label %[[IF_END]]
// CHECK1: [[IF_END]]:
-// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]]
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]]
// CHECK1-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]]
// CHECK1: [[IF_THEN22]]:
-// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
-// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
-// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]]
-// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]]
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]]
+// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]]
// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4
-// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
-// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]]
-// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]]
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]]
+// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]]
// CHECK1-NEXT: store i32 [[ADD26]], ptr [[J]], align 4
-// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4
-// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]])
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP42]])
// CHECK1-NEXT: br label %[[IF_END27]]
// CHECK1: [[IF_END27]]:
// CHECK1-NEXT: br label %[[FOR_INC:.*]]
// CHECK1: [[FOR_INC]]:
-// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1
// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK1: [[FOR_END]]:
@@ -256,7 +267,6 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
@@ -265,7 +275,6 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -274,7 +283,6 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4
@@ -304,172 +312,166 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1
// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP9]], ptr [[J]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1
// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4
// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4
-// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4
-// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]]
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]]
// CHECK1-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
-// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]]
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
-// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]]
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]]
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]]
// CHECK1-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1
// CHECK1-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK1-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP29]], 1
// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]]
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]]
// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
// CHECK1: [[COND_TRUE]]:
-// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
// CHECK1-NEXT: br label %[[COND_END:.*]]
// CHECK1: [[COND_FALSE]]:
-// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
// CHECK1-NEXT: br label %[[COND_END]]
// CHECK1: [[COND_END]]:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ]
// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4
-// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
-// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
-// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]]
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]]
// CHECK1-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]]
// CHECK1: [[COND_TRUE30]]:
-// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
// CHECK1-NEXT: br label %[[COND_END32:.*]]
// CHECK1: [[COND_FALSE31]]:
-// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
// CHECK1-NEXT: br label %[[COND_END32]]
// CHECK1: [[COND_END32]]:
-// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ]
+// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ]
// CHECK1-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
// CHECK1-NEXT: br label %[[FOR_COND:.*]]
// CHECK1: [[FOR_COND]]:
-// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
-// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]]
// CHECK1-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
// CHECK1: [[FOR_BODY]]:
-// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]]
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]]
// CHECK1-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
// CHECK1: [[IF_THEN]]:
-// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
-// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
-// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]]
-// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]]
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]]
+// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]]
// CHECK1-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4
-// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
-// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
-// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]]
-// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]]
+// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]]
+// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]]
// CHECK1-NEXT: store i32 [[ADD38]], ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]])
+// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP49]])
// CHECK1-NEXT: br label %[[IF_END]]
// CHECK1: [[IF_END]]:
-// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]]
+// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]]
// CHECK1-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]]
// CHECK1: [[IF_THEN40]]:
-// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
-// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
-// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]]
-// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]]
+// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]]
+// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]]
// CHECK1-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4
-// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
-// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]]
-// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]]
+// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]]
+// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]]
// CHECK1-NEXT: store i32 [[SUB44]], ptr [[J]], align 4
-// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4
-// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]])
+// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP58]])
// CHECK1-NEXT: br label %[[IF_END45]]
// CHECK1: [[IF_END45]]:
-// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
-// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]]
+// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]]
// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
// CHECK1: [[IF_THEN47]]:
-// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
-// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
-// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]]
-// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]]
+// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
+// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
+// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]]
+// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]]
// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4
-// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
-// CHECK1-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
-// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]]
-// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]]
+// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
+// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]]
+// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]]
// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4
-// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4
-// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP70]])
+// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP67]])
// CHECK1-NEXT: br label %[[IF_END52]]
// CHECK1: [[IF_END52]]:
// CHECK1-NEXT: br label %[[FOR_INC:.*]]
// CHECK1: [[FOR_INC]]:
-// CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1
+// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP68]], 1
// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
// CHECK1: [[FOR_END]]:
@@ -481,13 +483,11 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[ENTRY:.*:]]
// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -497,48 +497,43 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[__END224:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__RANGE221:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__END222:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__BEGIN225:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
// CHECK1-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4
@@ -565,225 +560,219 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64
-// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8
+// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
// CHECK1-NEXT: store i32 42, ptr [[C]], align 4
// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
-// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
-// CHECK1-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8
-// CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8
-// CHECK1-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
-// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
-// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64
+// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK1-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
-// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
-// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1
-// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1
-// CHECK1-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1
-// CHECK1-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
-// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8
-// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8
-// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
-// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1
-// CHECK1-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8
+// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK1-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4
-// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8
-// CHECK1-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
-// CHECK1-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256
-// CHECK1-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8
-// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8
-// CHECK1-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8
-// CHECK1-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8
-// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
-// CHECK1-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64
-// CHECK1-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64
-// CHECK1-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]]
-// CHECK1-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8
-// CHECK1-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1
-// CHECK1-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1
-// CHECK1-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1
-// CHECK1-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1
-// CHECK1-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8
-// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
-// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8
+// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE221]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0
+// CHECK1-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256
+// CHECK1-NEXT: store ptr [[ADD_PTR24]], ptr [[__END222]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8
+// CHECK1-NEXT: store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK1-NEXT: [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64
+// CHECK1-NEXT: [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64
+// CHECK1-NEXT: [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]]
+// CHECK1-NEXT: [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8
+// CHECK1-NEXT: [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1
+// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1
+// CHECK1-NEXT: [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1
+// CHECK1-NEXT: [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1
+// CHECK1-NEXT: store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8
// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8
// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
-// CHECK1-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1
-// CHECK1-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8
-// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
-// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
-// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
-// CHECK1-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]]
-// CHECK1-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]]
-// CHECK1: [[COND_TRUE44]]:
-// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
-// CHECK1-NEXT: br label %[[COND_END46:.*]]
-// CHECK1: [[COND_FALSE45]]:
-// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
-// CHECK1-NEXT: br label %[[COND_END46]]
-// CHECK1: [[COND_END46]]:
-// CHECK1-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ]
-// CHECK1-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8
-// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
-// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
-// CHECK1-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]]
-// CHECK1-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]]
-// CHECK1: [[COND_TRUE50]]:
-// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
-// CHECK1-NEXT: br label %[[COND_END52:.*]]
-// CHECK1: [[COND_FALSE51]]:
-// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
-// CHECK1-NEXT: br label %[[COND_END52]]
-// CHECK1: [[COND_END52]]:
-// CHECK1-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ]
-// CHECK1-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8
-// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8
+// CHECK1-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1
+// CHECK1-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]]
+// CHECK1-NEXT: br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]]
+// CHECK1: [[COND_TRUE42]]:
+// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK1-NEXT: br label %[[COND_END44:.*]]
+// CHECK1: [[COND_FALSE43]]:
+// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: br label %[[COND_END44]]
+// CHECK1: [[COND_END44]]:
+// CHECK1-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ]
+// CHECK1-NEXT: store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]]
+// CHECK1-NEXT: br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]]
+// CHECK1: [[COND_TRUE48]]:
+// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK1-NEXT: br label %[[COND_END50:.*]]
+// CHECK1: [[COND_FALSE49]]:
+// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: br label %[[COND_END50]]
+// CHECK1: [[COND_END50]]:
+// CHECK1-NEXT: [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ]
+// CHECK1-NEXT: store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8
// CHECK1-NEXT: br label %[[FOR_COND:.*]]
// CHECK1: [[FOR_COND]]:
-// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8
-// CHECK1-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]]
-// CHECK1-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK1-NEXT: [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT: br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
// CHECK1: [[FOR_BODY]]:
-// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
-// CHECK1-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]]
-// CHECK1-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]]
+// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]]
+// CHECK1-NEXT: br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]]
// CHECK1: [[IF_THEN]]:
-// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4
-// CHECK1-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64
-// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4
-// CHECK1-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64
-// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]]
-// CHECK1-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]]
-// CHECK1-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32
-// CHECK1-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4
-// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4
-// CHECK1-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1
-// CHECK1-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]]
-// CHECK1-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK1-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]]
-// CHECK1-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]]
-// CHECK1: [[IF_THEN64]]:
-// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
-// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
-// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]]
-// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]]
-// CHECK1-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4
-// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
-// CHECK1-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1
-// CHECK1-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]]
-// CHECK1-NEXT: store i32 [[ADD68]], ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP48]])
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT: [[CONV55:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK1-NEXT: [[CONV56:%.*]] = sext i32 [[TMP36]] to i64
+// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]]
+// CHECK1-NEXT: [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]]
+// CHECK1-NEXT: [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32
+// CHECK1-NEXT: store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT: [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1
+// CHECK1-NEXT: [[ADD60:%.*]] = add nsw i32 0, [[MUL59]]
+// CHECK1-NEXT: store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]]
+// CHECK1-NEXT: br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]]
+// CHECK1: [[IF_THEN62]]:
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]]
+// CHECK1-NEXT: [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]]
+// CHECK1-NEXT: store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1
+// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 0, [[MUL65]]
+// CHECK1-NEXT: store i32 [[ADD66]], ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP45]])
// CHECK1-NEXT: br label %[[IF_END]]
// CHECK1: [[IF_END]]:
-// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK1-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]]
-// CHECK1-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]]
-// CHECK1: [[IF_THEN70]]:
-// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
-// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
-// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]]
-// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]]
-// CHECK1-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4
-// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
-// CHECK1-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2
-// CHECK1-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]]
-// CHECK1-NEXT: store i32 [[ADD74]], ptr [[J]], align 4
-// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4
-// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP55]])
-// CHECK1-NEXT: br label %[[IF_END75]]
-// CHECK1: [[IF_END75]]:
-// CHECK1-NEXT: br label %[[IF_END76]]
-// CHECK1: [[IF_END76]]:
-// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
-// CHECK1-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]]
-// CHECK1-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]]
-// CHECK1: [[IF_THEN78]]:
-// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8
-// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8
-// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]]
-// CHECK1-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]]
-// CHECK1-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8
-// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
-// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8
-// CHECK1-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1
-// CHECK1-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]]
-// CHECK1-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8
-// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
-// CHECK1-NEXT: store ptr [[TMP63]], ptr [[V]], align 8
-// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4
-// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8
-// CHECK1-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8
-// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]])
-// CHECK1-NEXT: br label %[[IF_END83]]
-// CHECK1: [[IF_END83]]:
-// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
-// CHECK1-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]]
-// CHECK1-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]]
-// CHECK1: [[IF_THEN85]]:
-// CHECK1-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
-// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
-// CHECK1-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]]
-// CHECK1-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]]
-// CHECK1-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8
-// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
-// CHECK1-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
-// CHECK1-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1
-// CHECK1-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]]
-// CHECK1-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8
-// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8
-// CHECK1-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8
-// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4
-// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8
-// CHECK1-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8
-// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]])
-// CHECK1-NEXT: br label %[[IF_END90]]
-// CHECK1: [[IF_END90]]:
+// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]]
+// CHECK1-NEXT: br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]]
+// CHECK1: [[IF_THEN68]]:
+// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]]
+// CHECK1-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]]
+// CHECK1-NEXT: store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2
+// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 0, [[MUL71]]
+// CHECK1-NEXT: store i32 [[ADD72]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]])
+// CHECK1-NEXT: br label %[[IF_END73]]
+// CHECK1: [[IF_END73]]:
+// CHECK1-NEXT: br label %[[IF_END74]]
+// CHECK1: [[IF_END74]]:
+// CHECK1-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]]
+// CHECK1-NEXT: br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]]
+// CHECK1: [[IF_THEN76]]:
+// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]]
+// CHECK1-NEXT: [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]]
+// CHECK1-NEXT: store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1
+// CHECK1-NEXT: [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]]
+// CHECK1-NEXT: store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: store ptr [[TMP60]], ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[C]], align 4
+// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]])
+// CHECK1-NEXT: br label %[[IF_END81]]
+// CHECK1: [[IF_END81]]:
+// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK1-NEXT: [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]]
+// CHECK1-NEXT: br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]]
+// CHECK1: [[IF_THEN83]]:
+// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
+// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
+// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]]
+// CHECK1-NEXT: [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]]
+// CHECK1-NEXT: store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8
+// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
+// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1
+// CHECK1-NEXT: [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]]
+// CHECK1-NEXT: store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8
+// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8
+// CHECK1-NEXT: store ptr [[TMP71]], ptr [[VV]], align 8
+// CHECK1-NEXT: [[TMP72:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK1-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]])
+// CHECK1-NEXT: br label %[[IF_END88]]
+// CHECK1: [[IF_END88]]:
// CHECK1-NEXT: br label %[[FOR_INC:.*]]
// CHECK1: [[FOR_INC]]:
-// CHECK1-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1
-// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK1-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP75]], 1
+// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8
// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
// CHECK1: [[FOR_END]]:
// CHECK1-NEXT: ret void
@@ -794,13 +783,11 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[ENTRY:.*:]]
// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -815,12 +802,10 @@ extern "C" void foo4() {
// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
// CHECK1-NEXT: store i32 0, ptr [[K]], align 4
-// CHECK1-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
// CHECK1-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4
@@ -940,6 +925,277 @@ extern "C" void foo4() {
// CHECK1-NEXT: ret void
//
//
+// CHECK1-LABEL: define dso_local void @foo5(
+// CHECK1-SAME: ) #[[ATTR0]] {
+// CHECK1-NEXT: [[ENTRY:.*:]]
+// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[__RANGE264:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__BEGIN265:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[__END267:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: store i32 0, ptr [[J]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[K]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: store i32 512, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK1: [[COND_TRUE]]:
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK1-NEXT: br label %[[COND_END:.*]]
+// CHECK1: [[COND_FALSE]]:
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: br label %[[COND_END]]
+// CHECK1: [[COND_END]]:
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT: store i32 42, ptr [[C]], align 4
+// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
+// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK1-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK1-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]]
+// CHECK1-NEXT: br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]]
+// CHECK1: [[COND_TRUE24]]:
+// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK1-NEXT: br label %[[COND_END26:.*]]
+// CHECK1: [[COND_FALSE25]]:
+// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: br label %[[COND_END26]]
+// CHECK1: [[COND_END26]]:
+// CHECK1-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ]
+// CHECK1-NEXT: store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND:.*]]
+// CHECK1: [[FOR_COND]]:
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128
+// CHECK1-NEXT: br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK1: [[FOR_BODY]]:
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP21]])
+// CHECK1-NEXT: br label %[[FOR_INC:.*]]
+// CHECK1: [[FOR_INC]]:
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1
+// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4
+// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK1: [[FOR_END]]:
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND30:.*]]
+// CHECK1: [[FOR_COND30]]:
+// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK1-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]]
+// CHECK1-NEXT: br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]]
+// CHECK1: [[FOR_BODY32]]:
+// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK1-NEXT: [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]]
+// CHECK1-NEXT: br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]]
+// CHECK1: [[IF_THEN]]:
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK1-NEXT: [[CONV34:%.*]] = sext i32 [[TMP27]] to i64
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK1-NEXT: [[CONV35:%.*]] = sext i32 [[TMP28]] to i64
+// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]]
+// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]]
+// CHECK1-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32
+// CHECK1-NEXT: store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK1-NEXT: [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1
+// CHECK1-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]]
+// CHECK1-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK1-NEXT: [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]]
+// CHECK1-NEXT: br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]]
+// CHECK1: [[IF_THEN41]]:
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]]
+// CHECK1-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]]
+// CHECK1-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK1-NEXT: [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2
+// CHECK1-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]]
+// CHECK1-NEXT: store i32 [[ADD45]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP37]])
+// CHECK1-NEXT: br label %[[IF_END]]
+// CHECK1: [[IF_END]]:
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK1-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]]
+// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK1: [[IF_THEN47]]:
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK1-NEXT: [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]]
+// CHECK1-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]]
+// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK1-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1
+// CHECK1-NEXT: [[ADD51:%.*]] = add nsw i32 0, [[MUL50]]
+// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]])
+// CHECK1-NEXT: br label %[[IF_END52]]
+// CHECK1: [[IF_END52]]:
+// CHECK1-NEXT: br label %[[IF_END53]]
+// CHECK1: [[IF_END53]]:
+// CHECK1-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK1-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]]
+// CHECK1-NEXT: br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]]
+// CHECK1: [[IF_THEN55]]:
+// CHECK1-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK1-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK1-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]]
+// CHECK1-NEXT: [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]]
+// CHECK1-NEXT: store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK1-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK1-NEXT: [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1
+// CHECK1-NEXT: [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]]
+// CHECK1-NEXT: store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK1-NEXT: store ptr [[TMP52]], ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[C]], align 4
+// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK1-NEXT: [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]])
+// CHECK1-NEXT: br label %[[IF_END60]]
+// CHECK1: [[IF_END60]]:
+// CHECK1-NEXT: br label %[[FOR_INC61:.*]]
+// CHECK1: [[FOR_INC61]]:
+// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: [[INC62:%.*]] = add nsw i64 [[TMP56]], 1
+// CHECK1-NEXT: store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND30]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK1: [[FOR_END63]]:
+// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4
+// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE264]], align 8
+// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK1-NEXT: [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0
+// CHECK1-NEXT: [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256
+// CHECK1-NEXT: store ptr [[ADD_PTR69]], ptr [[__END267]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND70:.*]]
+// CHECK1: [[FOR_COND70]]:
+// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8
+// CHECK1-NEXT: [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]]
+// CHECK1-NEXT: br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]]
+// CHECK1: [[FOR_BODY72]]:
+// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT: store ptr [[TMP61]], ptr [[VV]], align 8
+// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK1-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8
+// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]])
+// CHECK1-NEXT: br label %[[FOR_INC73:.*]]
+// CHECK1: [[FOR_INC73]]:
+// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1
+// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8
+// CHECK1-NEXT: br label %[[FOR_COND70]]
+// CHECK1: [[FOR_END74]]:
+// CHECK1-NEXT: ret void
+//
+//
// CHECK2-LABEL: define dso_local void @body(
// CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: [[ENTRY:.*:]]
@@ -961,7 +1217,6 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
@@ -970,7 +1225,6 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -1002,107 +1256,103 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1
// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP9]], ptr [[J]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1
// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
-// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]]
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]]
// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
// CHECK2: [[COND_TRUE]]:
-// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
// CHECK2-NEXT: br label %[[COND_END:.*]]
// CHECK2: [[COND_FALSE]]:
-// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
// CHECK2-NEXT: br label %[[COND_END]]
// CHECK2: [[COND_END]]:
-// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ]
// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
// CHECK2-NEXT: br label %[[FOR_COND:.*]]
// CHECK2: [[FOR_COND]]:
-// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
-// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]]
// CHECK2-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
// CHECK2: [[FOR_BODY]]:
-// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]]
+// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
// CHECK2-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
// CHECK2: [[IF_THEN]]:
-// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
-// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
-// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]]
-// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]]
+// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]]
+// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]]
// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4
-// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
-// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
-// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]]
-// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]]
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]]
// CHECK2-NEXT: store i32 [[ADD20]], ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4
-// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP35]])
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP33]])
// CHECK2-NEXT: br label %[[IF_END]]
// CHECK2: [[IF_END]]:
-// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]]
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]]
// CHECK2-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]]
// CHECK2: [[IF_THEN22]]:
-// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
-// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
-// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]]
-// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]]
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]]
+// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]]
// CHECK2-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4
-// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
-// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]]
-// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]]
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]]
+// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]]
// CHECK2-NEXT: store i32 [[ADD26]], ptr [[J]], align 4
-// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4
-// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]])
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP42]])
// CHECK2-NEXT: br label %[[IF_END27]]
// CHECK2: [[IF_END27]]:
// CHECK2-NEXT: br label %[[FOR_INC:.*]]
// CHECK2: [[FOR_INC]]:
-// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1
// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK2: [[FOR_END]]:
@@ -1114,13 +1364,11 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[ENTRY:.*:]]
// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -1130,48 +1378,43 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[__END224:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__RANGE221:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__END222:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__BEGIN225:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
// CHECK2-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4
@@ -1198,225 +1441,219 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64
-// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8
+// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
// CHECK2-NEXT: store i32 42, ptr [[C]], align 4
// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
-// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
-// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
-// CHECK2-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
-// CHECK2-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8
-// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8
-// CHECK2-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0
-// CHECK2-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8
-// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8
-// CHECK2-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8
-// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8
-// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
-// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
-// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64
+// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK2-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
-// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
-// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1
-// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1
-// CHECK2-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1
-// CHECK2-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8
-// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
-// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8
-// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8
-// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8
-// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8
-// CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1
-// CHECK2-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8
+// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK2-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4
-// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8
-// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8
-// CHECK2-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
-// CHECK2-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256
-// CHECK2-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8
-// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8
-// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0
-// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8
-// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8
-// CHECK2-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0
-// CHECK2-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8
-// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8
-// CHECK2-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8
-// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8
-// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
-// CHECK2-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64
-// CHECK2-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64
-// CHECK2-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]]
-// CHECK2-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8
-// CHECK2-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1
-// CHECK2-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1
-// CHECK2-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1
-// CHECK2-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1
-// CHECK2-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8
-// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
-// CHECK2-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8
+// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE221]], align 8
+// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0
+// CHECK2-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256
+// CHECK2-NEXT: store ptr [[ADD_PTR24]], ptr [[__END222]], align 8
+// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8
+// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8
+// CHECK2-NEXT: store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
+// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK2-NEXT: [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64
+// CHECK2-NEXT: [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64
+// CHECK2-NEXT: [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]]
+// CHECK2-NEXT: [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8
+// CHECK2-NEXT: [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1
+// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1
+// CHECK2-NEXT: [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1
+// CHECK2-NEXT: [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1
+// CHECK2-NEXT: store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8
// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8
// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8
-// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8
-// CHECK2-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1
-// CHECK2-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8
-// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
-// CHECK2-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8
-// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
-// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
-// CHECK2-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]]
-// CHECK2-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]]
-// CHECK2: [[COND_TRUE44]]:
-// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8
-// CHECK2-NEXT: br label %[[COND_END46:.*]]
-// CHECK2: [[COND_FALSE45]]:
-// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
-// CHECK2-NEXT: br label %[[COND_END46]]
-// CHECK2: [[COND_END46]]:
-// CHECK2-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ]
-// CHECK2-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8
-// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
-// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
-// CHECK2-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]]
-// CHECK2-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]]
-// CHECK2: [[COND_TRUE50]]:
-// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
-// CHECK2-NEXT: br label %[[COND_END52:.*]]
-// CHECK2: [[COND_FALSE51]]:
-// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
-// CHECK2-NEXT: br label %[[COND_END52]]
-// CHECK2: [[COND_END52]]:
-// CHECK2-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ]
-// CHECK2-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8
-// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8
+// CHECK2-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1
+// CHECK2-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]]
+// CHECK2-NEXT: br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]]
+// CHECK2: [[COND_TRUE42]]:
+// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8
+// CHECK2-NEXT: br label %[[COND_END44:.*]]
+// CHECK2: [[COND_FALSE43]]:
+// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: br label %[[COND_END44]]
+// CHECK2: [[COND_END44]]:
+// CHECK2-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ]
+// CHECK2-NEXT: store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]]
+// CHECK2-NEXT: br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]]
+// CHECK2: [[COND_TRUE48]]:
+// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8
+// CHECK2-NEXT: br label %[[COND_END50:.*]]
+// CHECK2: [[COND_FALSE49]]:
+// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: br label %[[COND_END50]]
+// CHECK2: [[COND_END50]]:
+// CHECK2-NEXT: [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ]
+// CHECK2-NEXT: store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8
// CHECK2-NEXT: br label %[[FOR_COND:.*]]
// CHECK2: [[FOR_COND]]:
-// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8
-// CHECK2-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]]
-// CHECK2-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8
+// CHECK2-NEXT: [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT: br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
// CHECK2: [[FOR_BODY]]:
-// CHECK2-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8
-// CHECK2-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]]
-// CHECK2-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]]
+// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]]
+// CHECK2-NEXT: br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]]
// CHECK2: [[IF_THEN]]:
-// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4
-// CHECK2-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64
-// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4
-// CHECK2-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64
-// CHECK2-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]]
-// CHECK2-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]]
-// CHECK2-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32
-// CHECK2-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4
-// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4
-// CHECK2-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1
-// CHECK2-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]]
-// CHECK2-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK2-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]]
-// CHECK2-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]]
-// CHECK2: [[IF_THEN64]]:
-// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
-// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
-// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]]
-// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]]
-// CHECK2-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4
-// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
-// CHECK2-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1
-// CHECK2-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]]
-// CHECK2-NEXT: store i32 [[ADD68]], ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4
-// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP48]])
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT: [[CONV55:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK2-NEXT: [[CONV56:%.*]] = sext i32 [[TMP36]] to i64
+// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]]
+// CHECK2-NEXT: [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]]
+// CHECK2-NEXT: [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32
+// CHECK2-NEXT: store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT: [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1
+// CHECK2-NEXT: [[ADD60:%.*]] = add nsw i32 0, [[MUL59]]
+// CHECK2-NEXT: store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]]
+// CHECK2-NEXT: br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]]
+// CHECK2: [[IF_THEN62]]:
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT: [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]]
+// CHECK2-NEXT: store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1
+// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 0, [[MUL65]]
+// CHECK2-NEXT: store i32 [[ADD66]], ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP45]])
// CHECK2-NEXT: br label %[[IF_END]]
// CHECK2: [[IF_END]]:
-// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK2-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]]
-// CHECK2-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]]
-// CHECK2: [[IF_THEN70]]:
-// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
-// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
-// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]]
-// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]]
-// CHECK2-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4
-// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
-// CHECK2-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2
-// CHECK2-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]]
-// CHECK2-NEXT: store i32 [[ADD74]], ptr [[J]], align 4
-// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4
-// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP55]])
-// CHECK2-NEXT: br label %[[IF_END75]]
-// CHECK2: [[IF_END75]]:
-// CHECK2-NEXT: br label %[[IF_END76]]
-// CHECK2: [[IF_END76]]:
-// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8
-// CHECK2-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]]
-// CHECK2-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]]
-// CHECK2: [[IF_THEN78]]:
-// CHECK2-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8
-// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8
-// CHECK2-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]]
-// CHECK2-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]]
-// CHECK2-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8
-// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8
-// CHECK2-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8
-// CHECK2-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1
-// CHECK2-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]]
-// CHECK2-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8
-// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
-// CHECK2-NEXT: store ptr [[TMP63]], ptr [[V]], align 8
-// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4
-// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8
-// CHECK2-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8
-// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]])
-// CHECK2-NEXT: br label %[[IF_END83]]
-// CHECK2: [[IF_END83]]:
-// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
-// CHECK2-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]]
-// CHECK2-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]]
-// CHECK2: [[IF_THEN85]]:
-// CHECK2-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
-// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
-// CHECK2-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]]
-// CHECK2-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]]
-// CHECK2-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8
-// CHECK2-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8
-// CHECK2-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
-// CHECK2-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1
-// CHECK2-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]]
-// CHECK2-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8
-// CHECK2-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8
-// CHECK2-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8
-// CHECK2-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4
-// CHECK2-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8
-// CHECK2-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8
-// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]])
-// CHECK2-NEXT: br label %[[IF_END90]]
-// CHECK2: [[IF_END90]]:
+// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]]
+// CHECK2-NEXT: br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]]
+// CHECK2: [[IF_THEN68]]:
+// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]]
+// CHECK2-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]]
+// CHECK2-NEXT: store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2
+// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 0, [[MUL71]]
+// CHECK2-NEXT: store i32 [[ADD72]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]])
+// CHECK2-NEXT: br label %[[IF_END73]]
+// CHECK2: [[IF_END73]]:
+// CHECK2-NEXT: br label %[[IF_END74]]
+// CHECK2: [[IF_END74]]:
+// CHECK2-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]]
+// CHECK2-NEXT: br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]]
+// CHECK2: [[IF_THEN76]]:
+// CHECK2-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]]
+// CHECK2-NEXT: [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]]
+// CHECK2-NEXT: store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1
+// CHECK2-NEXT: [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]]
+// CHECK2-NEXT: store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: store ptr [[TMP60]], ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[C]], align 4
+// CHECK2-NEXT: [[TMP62:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]])
+// CHECK2-NEXT: br label %[[IF_END81]]
+// CHECK2: [[IF_END81]]:
+// CHECK2-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8
+// CHECK2-NEXT: [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]]
+// CHECK2-NEXT: br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]]
+// CHECK2: [[IF_THEN83]]:
+// CHECK2-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8
+// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8
+// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]]
+// CHECK2-NEXT: [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]]
+// CHECK2-NEXT: store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8
+// CHECK2-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8
+// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8
+// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1
+// CHECK2-NEXT: [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]]
+// CHECK2-NEXT: store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8
+// CHECK2-NEXT: [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8
+// CHECK2-NEXT: store ptr [[TMP71]], ptr [[VV]], align 8
+// CHECK2-NEXT: [[TMP72:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK2-NEXT: [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK2-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]])
+// CHECK2-NEXT: br label %[[IF_END88]]
+// CHECK2: [[IF_END88]]:
// CHECK2-NEXT: br label %[[FOR_INC:.*]]
// CHECK2: [[FOR_INC]]:
-// CHECK2-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8
-// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1
-// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8
+// CHECK2-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8
+// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP75]], 1
+// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8
// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
// CHECK2: [[FOR_END]]:
// CHECK2-NEXT: ret void
@@ -1427,13 +1664,11 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[ENTRY:.*:]]
// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -1448,12 +1683,10 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
// CHECK2-NEXT: store i32 0, ptr [[K]], align 4
-// CHECK2-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
// CHECK2-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4
@@ -1573,6 +1806,277 @@ extern "C" void foo4() {
// CHECK2-NEXT: ret void
//
//
+// CHECK2-LABEL: define dso_local void @foo5(
+// CHECK2-SAME: ) #[[ATTR0]] {
+// CHECK2-NEXT: [[ENTRY:.*:]]
+// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[__RANGE264:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__BEGIN265:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[__END267:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: store i32 0, ptr [[J]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[K]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: store i32 512, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK2: [[COND_TRUE]]:
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: br label %[[COND_END:.*]]
+// CHECK2: [[COND_FALSE]]:
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: br label %[[COND_END]]
+// CHECK2: [[COND_END]]:
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64
+// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT: store i32 42, ptr [[C]], align 4
+// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0
+// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256
+// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8
+// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8
+// CHECK2-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8
+// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64
+// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64
+// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8
+// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1
+// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1
+// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1
+// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1
+// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8
+// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1
+// CHECK2-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]]
+// CHECK2-NEXT: br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]]
+// CHECK2: [[COND_TRUE24]]:
+// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8
+// CHECK2-NEXT: br label %[[COND_END26:.*]]
+// CHECK2: [[COND_FALSE25]]:
+// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: br label %[[COND_END26]]
+// CHECK2: [[COND_END26]]:
+// CHECK2-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ]
+// CHECK2-NEXT: store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND:.*]]
+// CHECK2: [[FOR_COND]]:
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128
+// CHECK2-NEXT: br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK2: [[FOR_BODY]]:
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP21]])
+// CHECK2-NEXT: br label %[[FOR_INC:.*]]
+// CHECK2: [[FOR_INC]]:
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1
+// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK2: [[FOR_END]]:
+// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND30:.*]]
+// CHECK2: [[FOR_COND30]]:
+// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8
+// CHECK2-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]]
+// CHECK2-NEXT: br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]]
+// CHECK2: [[FOR_BODY32]]:
+// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8
+// CHECK2-NEXT: [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]]
+// CHECK2-NEXT: br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]]
+// CHECK2: [[IF_THEN]]:
+// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4
+// CHECK2-NEXT: [[CONV34:%.*]] = sext i32 [[TMP27]] to i64
+// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4
+// CHECK2-NEXT: [[CONV35:%.*]] = sext i32 [[TMP28]] to i64
+// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]]
+// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]]
+// CHECK2-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32
+// CHECK2-NEXT: store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4
+// CHECK2-NEXT: [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1
+// CHECK2-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]]
+// CHECK2-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT: br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]]
+// CHECK2: [[IF_THEN41]]:
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]]
+// CHECK2-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]]
+// CHECK2-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2
+// CHECK2-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]]
+// CHECK2-NEXT: store i32 [[ADD45]], ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP37]])
+// CHECK2-NEXT: br label %[[IF_END]]
+// CHECK2: [[IF_END]]:
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]]
+// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
+// CHECK2: [[IF_THEN47]]:
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]]
+// CHECK2-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]]
+// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1
+// CHECK2-NEXT: [[ADD51:%.*]] = add nsw i32 0, [[MUL50]]
+// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4
+// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]])
+// CHECK2-NEXT: br label %[[IF_END52]]
+// CHECK2: [[IF_END52]]:
+// CHECK2-NEXT: br label %[[IF_END53]]
+// CHECK2: [[IF_END53]]:
+// CHECK2-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8
+// CHECK2-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]]
+// CHECK2-NEXT: br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]]
+// CHECK2: [[IF_THEN55]]:
+// CHECK2-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8
+// CHECK2-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8
+// CHECK2-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]]
+// CHECK2-NEXT: [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]]
+// CHECK2-NEXT: store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK2-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8
+// CHECK2-NEXT: [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1
+// CHECK2-NEXT: [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]]
+// CHECK2-NEXT: store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8
+// CHECK2-NEXT: store ptr [[TMP52]], ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[C]], align 4
+// CHECK2-NEXT: [[TMP54:%.*]] = load ptr, ptr [[V]], align 8
+// CHECK2-NEXT: [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]])
+// CHECK2-NEXT: br label %[[IF_END60]]
+// CHECK2: [[IF_END60]]:
+// CHECK2-NEXT: br label %[[FOR_INC61:.*]]
+// CHECK2: [[FOR_INC61]]:
+// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: [[INC62:%.*]] = add nsw i64 [[TMP56]], 1
+// CHECK2-NEXT: store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND30]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK2: [[FOR_END63]]:
+// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4
+// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE264]], align 8
+// CHECK2-NEXT: [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8
+// CHECK2-NEXT: [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0
+// CHECK2-NEXT: [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256
+// CHECK2-NEXT: store ptr [[ADD_PTR69]], ptr [[__END267]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND70:.*]]
+// CHECK2: [[FOR_COND70]]:
+// CHECK2-NEXT: [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8
+// CHECK2-NEXT: [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]]
+// CHECK2-NEXT: br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]]
+// CHECK2: [[FOR_BODY72]]:
+// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT: store ptr [[TMP61]], ptr [[VV]], align 8
+// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8
+// CHECK2-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]])
+// CHECK2-NEXT: br label %[[FOR_INC73:.*]]
+// CHECK2: [[FOR_INC73]]:
+// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1
+// CHECK2-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8
+// CHECK2-NEXT: br label %[[FOR_COND70]]
+// CHECK2: [[FOR_END74]]:
+// CHECK2-NEXT: ret void
+//
+//
// CHECK2-LABEL: define dso_local void @tfoo2(
// CHECK2-SAME: ) #[[ATTR0]] {
// CHECK2-NEXT: [[ENTRY:.*:]]
@@ -1593,7 +2097,6 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4
@@ -1602,7 +2105,6 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4
@@ -1611,7 +2113,6 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4
@@ -1641,174 +2142,168 @@ extern "C" void foo4() {
// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]]
// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1
// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP9]], ptr [[J]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
-// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]]
// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]]
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]]
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]]
+// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]]
// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1
// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1
// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
+// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4
// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4
-// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4
-// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
// CHECK2-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4
-// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4
-// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
-// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]]
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4
+// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]]
// CHECK2-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1
-// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
-// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]]
-// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
-// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]]
+// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]]
+// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]]
// CHECK2-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1
// CHECK2-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK2-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4
-// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1
+// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP29]], 1
// CHECK2-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4
-// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK2-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
-// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]]
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]]
// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
// CHECK2: [[COND_TRUE]]:
-// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4
// CHECK2-NEXT: br label %[[COND_END:.*]]
// CHECK2: [[COND_FALSE]]:
-// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
// CHECK2-NEXT: br label %[[COND_END]]
// CHECK2: [[COND_END]]:
-// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ]
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ]
// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4
-// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
-// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
-// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]]
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]]
// CHECK2-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]]
// CHECK2: [[COND_TRUE30]]:
-// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4
// CHECK2-NEXT: br label %[[COND_END32:.*]]
// CHECK2: [[COND_FALSE31]]:
-// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
// CHECK2-NEXT: br label %[[COND_END32]]
// CHECK2: [[COND_END32]]:
-// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ]
+// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ]
// CHECK2-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4
// CHECK2-NEXT: br label %[[FOR_COND:.*]]
// CHECK2: [[FOR_COND]]:
-// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
-// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]]
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4
+// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]]
// CHECK2-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
// CHECK2: [[FOR_BODY]]:
-// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
-// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]]
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4
+// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]]
// CHECK2-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
// CHECK2: [[IF_THEN]]:
-// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
-// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
-// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]]
-// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]]
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4
+// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4
+// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]]
+// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]]
// CHECK2-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4
-// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
-// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
-// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]]
-// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]]
+// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4
+// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]]
+// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]]
// CHECK2-NEXT: store i32 [[ADD38]], ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4
-// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]])
+// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP49]])
// CHECK2-NEXT: br label %[[IF_END]]
// CHECK2: [[IF_END]]:
-// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
-// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]]
+// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4
+// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]]
// CHECK2-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]]
// CHECK2: [[IF_THEN40]]:
-// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
-// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
-// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]]
-// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]]
+// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4
+// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4
+// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]]
+// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]]
// CHECK2-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4
-// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
-// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
-// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
-// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]]
-// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]]
+// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4
+// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4
+// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4
+// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]]
+// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]]
// CHECK2-NEXT: store i32 [[SUB44]], ptr [[J]], align 4
-// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4
-// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]])
+// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP58]])
// CHECK2-NEXT: br label %[[IF_END45]]
// CHECK2: [[IF_END45]]:
-// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
-// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]]
+// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4
+// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]]
// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]]
// CHECK2: [[IF_THEN47]]:
-// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
-// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
-// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]]
-// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]]
+// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4
+// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4
+// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]]
+// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]]
// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4
-// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
-// CHECK2-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
-// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]]
-// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]]
+// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4
+// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4
+// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]]
+// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]]
// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4
-// CHECK2-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4
-// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP70]])
+// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP67]])
// CHECK2-NEXT: br label %[[IF_END52]]
// CHECK2: [[IF_END52]]:
// CHECK2-NEXT: br label %[[FOR_INC:.*]]
// CHECK2: [[FOR_INC]]:
-// CHECK2-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1
+// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4
+// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP68]], 1
// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4
-// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK2: [[FOR_END]]:
// CHECK2-NEXT: ret void
//
@@ -1819,6 +2314,8 @@ extern "C" void foo4() {
// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
// CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]}
// CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
+// CHECK1: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]}
+// CHECK1: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]}
//.
// CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
// CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"}
@@ -1826,4 +2323,6 @@ extern "C" void foo4() {
// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]}
// CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]}
// CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
+// CHECK2: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]}
+// CHECK2: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]}
//.
>From 0d90fa9bbeb6ea0d35ceaa7ef27a42463f257320 Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:44:48 +0000
Subject: [PATCH 5/7] Fixed missing diagnostic groups in warnings
---
clang/include/clang/Basic/DiagnosticSemaKinds.td | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index ecfb0c83a3851..94d1f3c3e6349 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11517,7 +11517,8 @@ def note_omp_implicit_dsa : Note<
def err_omp_loop_var_dsa : Error<
"loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">;
def warn_omp_different_loop_ind_var_types : Warning <
- "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">;
+ "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">,
+ InGroup<OpenMPLoopForm>;
def err_omp_not_canonical_loop : Error <
"loop after '#pragma omp %0' is not in canonical form">;
def err_omp_not_a_loop_sequence : Error <
@@ -11528,7 +11529,8 @@ def err_omp_invalid_looprange : Error <
"loop range in '#pragma omp %0' exceeds the number of available loops: "
"range end '%1' is greater than the total number of loops '%2'">;
def warn_omp_redundant_fusion : Warning <
- "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">;
+ "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">,
+ InGroup<OpenMPClauses>;
def err_omp_not_for : Error<
"%select{statement after '#pragma omp %1' must be a for loop|"
"expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">;
>From 07e7dc817c0862d910599ccae7c5057f72cf7fef Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:49:50 +0000
Subject: [PATCH 6/7] Fixed formatting and comments
---
clang/lib/Sema/SemaOpenMP.cpp | 112 ++++++++++++++++++----------------
1 file changed, 58 insertions(+), 54 deletions(-)
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 30f8cd3087268..e6557fe9e2187 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -14128,42 +14128,43 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective(
}
// Overloaded base case function
-template <typename T, typename F>
-static bool tryHandleAs(T *t, F &&) {
- return false;
+template <typename T, typename F> static bool tryHandleAs(T *t, F &&) {
+ return false;
}
/**
- * Tries to recursively cast `t` to one of the given types and invokes `f` if successful.
+ * Tries to recursively cast `t` to one of the given types and invokes `f` if
+ * successful.
*
* @tparam Class The first type to check.
* @tparam Rest The remaining types to check.
* @tparam T The base type of `t`.
- * @tparam F The callable type for the function to invoke upon a successful cast.
+ * @tparam F The callable type for the function to invoke upon a successful
+ * cast.
* @param t The object to be checked.
* @param f The function to invoke if `t` matches `Class`.
* @return `true` if `t` matched any type and `f` was called, otherwise `false`.
*/
template <typename Class, typename... Rest, typename T, typename F>
static bool tryHandleAs(T *t, F &&f) {
- if (Class *c = dyn_cast<Class>(t)) {
- f(c);
- return true;
- } else {
- return tryHandleAs<Rest...>(t, std::forward<F>(f));
- }
+ if (Class *c = dyn_cast<Class>(t)) {
+ f(c);
+ return true;
+ } else {
+ return tryHandleAs<Rest...>(t, std::forward<F>(f));
+ }
}
// Updates OriginalInits by checking Transform against loop transformation
// directives and appending their pre-inits if a match is found.
static void updatePreInits(OMPLoopBasedDirective *Transform,
SmallVectorImpl<SmallVector<Stmt *, 0>> &PreInits) {
- if (!tryHandleAs<OMPTileDirective, OMPUnrollDirective, OMPReverseDirective,
- OMPInterchangeDirective, OMPFuseDirective>(
- Transform, [&PreInits](auto *Dir) {
- appendFlattenedStmtList(PreInits.back(), Dir->getPreInits());
- }))
- llvm_unreachable("Unhandled loop transformation");
+ if (!tryHandleAs<OMPTileDirective, OMPUnrollDirective, OMPReverseDirective,
+ OMPInterchangeDirective, OMPFuseDirective>(
+ Transform, [&PreInits](auto *Dir) {
+ appendFlattenedStmtList(PreInits.back(), Dir->getPreInits());
+ }))
+ llvm_unreachable("Unhandled loop transformation");
}
bool SemaOpenMP::checkTransformableLoopNest(
@@ -14241,43 +14242,42 @@ class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor {
unsigned getNestedLoopCount() const { return NestedLoopCount; }
bool VisitForStmt(ForStmt *FS) override {
- ++NestedLoopCount;
- return true;
+ ++NestedLoopCount;
+ return true;
}
bool VisitCXXForRangeStmt(CXXForRangeStmt *FRS) override {
- ++NestedLoopCount;
- return true;
+ ++NestedLoopCount;
+ return true;
}
bool TraverseStmt(Stmt *S) override {
- if (!S)
+ if (!S)
return true;
- // Skip traversal of all expressions, including special cases like
- // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions
- // may contain inner statements (and even loops), but they are not part
- // of the syntactic body of the surrounding loop structure.
- // Therefore must not be counted
- if (isa<Expr>(S))
+ // Skip traversal of all expressions, including special cases like
+ // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions
+ // may contain inner statements (and even loops), but they are not part
+ // of the syntactic body of the surrounding loop structure.
+ // Therefore must not be counted
+ if (isa<Expr>(S))
return true;
- // Only recurse into CompoundStmt (block {}) and loop bodies
- if (isa<CompoundStmt>(S) || isa<ForStmt>(S) ||
- isa<CXXForRangeStmt>(S)) {
+ // Only recurse into CompoundStmt (block {}) and loop bodies
+ if (isa<CompoundStmt>(S) || isa<ForStmt>(S) || isa<CXXForRangeStmt>(S)) {
return DynamicRecursiveASTVisitor::TraverseStmt(S);
- }
+ }
- // Stop traversal of the rest of statements, that break perfect
- // loop nesting, such as control flow (IfStmt, SwitchStmt...)
- return true;
+ // Stop traversal of the rest of statements, that break perfect
+ // loop nesting, such as control flow (IfStmt, SwitchStmt...)
+ return true;
}
bool TraverseDecl(Decl *D) override {
- // Stop in the case of finding a declaration, it is not important
- // in order to find nested loops (Possible CXXRecordDecl, RecordDecl,
- // FunctionDecl...)
- return true;
+ // Stop in the case of finding a declaration, it is not important
+ // in order to find nested loops (Possible CXXRecordDecl, RecordDecl,
+ // FunctionDecl...)
+ return true;
}
};
@@ -14435,15 +14435,14 @@ bool SemaOpenMP::analyzeLoopSequence(
return isa<OMPLoopTransformationDirective>(Child);
};
-
// High level grammar validation
for (auto *Child : LoopSeqStmt->children()) {
- if (!Child)
+ if (!Child)
continue;
- // Skip over non-loop-sequence statements
- if (!isLoopSequenceDerivation(Child)) {
+ // Skip over non-loop-sequence statements
+ if (!isLoopSequenceDerivation(Child)) {
Child = Child->IgnoreContainers();
// Ignore empty compound statement
@@ -14461,9 +14460,9 @@ bool SemaOpenMP::analyzeLoopSequence(
// Already been treated, skip this children
continue;
}
- }
- // Regular loop sequence handling
- if (isLoopSequenceDerivation(Child)) {
+ }
+ // Regular loop sequence handling
+ if (isLoopSequenceDerivation(Child)) {
if (isLoopGeneratingStmt(Child)) {
if (!analyzeLoopGeneration(Child)) {
return false;
@@ -14477,12 +14476,12 @@ bool SemaOpenMP::analyzeLoopSequence(
// Update the Loop Sequence size by one
++LoopSeqSize;
}
- } else {
+ } else {
// Report error for invalid statement inside canonical loop sequence
Diag(Child->getBeginLoc(), diag::err_omp_not_for)
<< 0 << getOpenMPDirectiveName(Kind);
return false;
- }
+ }
}
return true;
}
@@ -14499,9 +14498,9 @@ bool SemaOpenMP::checkTransformableLoopSequence(
// Checks whether the given statement is a compound statement
if (!isa<CompoundStmt>(AStmt)) {
- Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence)
- << getOpenMPDirectiveName(Kind);
- return false;
+ Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence)
+ << getOpenMPDirectiveName(Kind);
+ return false;
}
// Number of top level canonical loop nests observed (And acts as index)
LoopSeqSize = 0;
@@ -14532,7 +14531,7 @@ bool SemaOpenMP::checkTransformableLoopSequence(
OriginalInits, TransformsPreInits,
LoopSequencePreInits, LoopCategories, Context,
Kind)) {
- return false;
+ return false;
}
if (LoopSeqSize <= 0) {
Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence)
@@ -15233,7 +15232,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
Stmt *LoopStmt = nullptr;
collectLoopStmts(AStmt, {LoopStmt});
- // Determine the PreInit declarations.e
+ // Determine the PreInit declarations.
SmallVector<Stmt *, 4> PreInits;
addLoopPreInits(Context, LoopHelper, LoopStmt, OriginalInits[0], PreInits);
@@ -15848,13 +15847,18 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
CountVal = CountInt.getZExtValue();
};
- // Checks if the loop range is valid
+ // OpenMP [6.0, Restrictions]
+ // first + count - 1 must not evaluate to a value greater than the
+ // loop sequence length of the associated canonical loop sequence.
auto ValidLoopRange = [](uint64_t FirstVal, uint64_t CountVal,
unsigned NumLoops) -> bool {
return FirstVal + CountVal - 1 <= NumLoops;
};
uint64_t FirstVal = 1, CountVal = 0, LastVal = LoopSeqSize;
+ // Validates the loop range after evaluating the semantic information
+ // and ensures that the range is valid for the given loop sequence size.
+ // Expressions are evaluated at compile time to obtain constant values.
if (LRC) {
EvaluateLoopRangeArguments(LRC->getFirst(), LRC->getCount(), FirstVal,
CountVal);
>From 78cec6d0600464d3336cdf7af19beffa12025474 Mon Sep 17 00:00:00 2001
From: eZWALT <waltertheshadow333 at gmail.com>
Date: Fri, 9 May 2025 10:58:54 +0000
Subject: [PATCH 7/7] Added minimal changes to enable flang future
implementation
---
flang/include/flang/Parser/dump-parse-tree.h | 1 +
flang/include/flang/Parser/parse-tree.h | 9 +++++++++
flang/lib/Lower/OpenMP/Clauses.cpp | 5 +++++
flang/lib/Lower/OpenMP/Clauses.h | 1 +
flang/lib/Parser/openmp-parsers.cpp | 7 +++++++
flang/lib/Parser/unparse.cpp | 7 +++++++
flang/lib/Semantics/check-omp-structure.cpp | 9 +++++++++
llvm/include/llvm/Frontend/OpenMP/OMP.td | 1 +
8 files changed, 40 insertions(+)
diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index a3721bc8410ba..4f2d715ba6a2e 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -608,6 +608,7 @@ class ParseTreeDumper {
NODE(OmpLinearClause, Modifier)
NODE(parser, OmpLinearModifier)
NODE_ENUM(OmpLinearModifier, Value)
+ NODE(parser, OmpLoopRangeClause)
NODE(parser, OmpStepComplexModifier)
NODE(parser, OmpStepSimpleModifier)
NODE(parser, OmpLoopDirective)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index a0d7a797e7203..ae120ca20f686 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4361,6 +4361,15 @@ struct OmpLinearClause {
std::tuple<OmpObjectList, MODIFIERS(), /*PostModified=*/bool> t;
};
+// Ref: [6.0:207-208]
+//
+// loop-range-clause ->
+// LOOPRANGE(first, count) // since 6.0
+struct OmpLoopRangeClause {
+ TUPLE_CLASS_BOILERPLATE(OmpLoopRangeClause);
+ std::tuple<ScalarIntConstantExpr, ScalarIntConstantExpr> t;
+};
+
// Ref: [4.5:216-219], [5.0:315-324], [5.1:347-355], [5.2:150-158]
//
// map-clause ->
diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
index c258bef2e4427..d26733138fa4f 100644
--- a/flang/lib/Lower/OpenMP/Clauses.cpp
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -998,6 +998,11 @@ Link make(const parser::OmpClause::Link &inp,
return Link{/*List=*/makeObjects(inp.v, semaCtx)};
}
+LoopRange make(const parser::OmpClause::Looprange &inp,
+ semantics::SemanticsContext &semaCtx) {
+ llvm_unreachable("Unimplemented: looprange");
+}
+
Map make(const parser::OmpClause::Map &inp,
semantics::SemanticsContext &semaCtx) {
// inp.v -> parser::OmpMapClause
diff --git a/flang/lib/Lower/OpenMP/Clauses.h b/flang/lib/Lower/OpenMP/Clauses.h
index d7ab21d428e32..bda8571e65f23 100644
--- a/flang/lib/Lower/OpenMP/Clauses.h
+++ b/flang/lib/Lower/OpenMP/Clauses.h
@@ -239,6 +239,7 @@ using Initializer = tomp::clause::InitializerT<TypeTy, IdTy, ExprTy>;
using InReduction = tomp::clause::InReductionT<TypeTy, IdTy, ExprTy>;
using IsDevicePtr = tomp::clause::IsDevicePtrT<TypeTy, IdTy, ExprTy>;
using Lastprivate = tomp::clause::LastprivateT<TypeTy, IdTy, ExprTy>;
+using LoopRange = tomp::clause::LoopRangeT<TypeTy, IdTy, ExprTy>;
using Linear = tomp::clause::LinearT<TypeTy, IdTy, ExprTy>;
using Link = tomp::clause::LinkT<TypeTy, IdTy, ExprTy>;
using Map = tomp::clause::MapT<TypeTy, IdTy, ExprTy>;
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index c4728e0fabe61..17fffa83d5af1 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -837,6 +837,11 @@ TYPE_PARSER(
maybe(":"_tok >> nonemptyList(Parser<OmpLinearClause::Modifier>{})),
/*PostModified=*/pure(true)))
+TYPE_PARSER(
+ construct<OmpLoopRangeClause>(scalarIntConstantExpr,
+ "," >> scalarIntConstantExpr)
+)
+
// OpenMPv5.2 12.5.2 detach-clause -> DETACH (event-handle)
TYPE_PARSER(construct<OmpDetachClause>(Parser<OmpObject>{}))
@@ -1006,6 +1011,8 @@ TYPE_PARSER( //
parenthesized(Parser<OmpLinearClause>{}))) ||
"LINK" >> construct<OmpClause>(construct<OmpClause::Link>(
parenthesized(Parser<OmpObjectList>{}))) ||
+ "LOOPRANGE" >> construct<OmpClause>(construct<OmpClause::Looprange>(
+ parenthesized(Parser<OmpLoopRangeClause>{}))) ||
"MAP" >> construct<OmpClause>(construct<OmpClause::Map>(
parenthesized(Parser<OmpMapClause>{}))) ||
"MATCH" >> construct<OmpClause>(construct<OmpClause::Match>(
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 1ee9096fcda56..bf7daa44c7bd6 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2309,6 +2309,13 @@ class UnparseVisitor {
}
}
}
+ void Unparse(const OmpLoopRangeClause &x) {
+ Word("LOOPRANGE(");
+ Walk(std::get<0>(x.t));
+ Put(", ");
+ Walk(std::get<1>(x.t));
+ Put(")");
+ }
void Unparse(const OmpReductionClause &x) {
using Modifier = OmpReductionClause::Modifier;
Walk(std::get<std::optional<std::list<Modifier>>>(x.t), ": ");
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index dd8e511642976..fc9e2e32d6639 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -3368,6 +3368,15 @@ CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Collapse, OMPC_collapse)
CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Safelen, OMPC_safelen)
CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Simdlen, OMPC_simdlen)
+void OmpStructureChecker::Enter(const parser::OmpClause::Looprange &x) {
+ context_.Say(GetContext().clauseSource,
+ "LOOPRANGE clause is not implemented yet"_err_en_US,
+ ContextDirectiveAsFortran());
+}
+
+void OmpStructureChecker::Enter(const parser::OmpClause::FreeAgent &x) {
+ context_.Say(GetContext().clauseSource,
+ "FREE_AGENT clause is not implemented yet"_err_en_US,
// Restrictions specific to each clause are implemented apart from the
// generalized restrictions.
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 366cc7ef853d3..491cd47dc2902 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -273,6 +273,7 @@ def OMPC_Link : Clause<"link"> {
}
def OMPC_LoopRange : Clause<"looprange"> {
let clangClass = "OMPLoopRangeClause";
+ let flangClass = "OmpLoopRangeClause";
}
def OMPC_Map : Clause<"map"> {
let clangClass = "OMPMapClause";
More information about the llvm-commits
mailing list