[clang] [Clang][OpenMP] Allow `num_teams` to accept multiple expressions (PR #99732)
Shilei Tian via cfe-commits
cfe-commits at lists.llvm.org
Sun Jul 28 19:24:28 PDT 2024
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/99732
>From 37c28dba5566ee39dfd67bf31b24cc7f0427ff03 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Fri, 19 Jul 2024 22:07:06 -0400
Subject: [PATCH] [Clang][OpenMP] Allow `num_teams` to accept multiple
expressions
---
clang/include/clang/AST/OpenMPClause.h | 79 ++-
clang/include/clang/AST/RecursiveASTVisitor.h | 2 +-
clang/include/clang/Sema/SemaOpenMP.h | 3 +-
clang/lib/AST/OpenMPClause.cpp | 26 +-
clang/lib/AST/StmtProfile.cpp | 3 +-
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 7 +-
clang/lib/CodeGen/CGStmtOpenMP.cpp | 2 +-
clang/lib/Parse/ParseOpenMP.cpp | 8 +-
clang/lib/Sema/SemaOpenMP.cpp | 53 +-
clang/lib/Sema/TreeTransform.h | 7 +-
clang/lib/Serialization/ASTReader.cpp | 10 +-
clang/lib/Serialization/ASTWriter.cpp | 4 +-
.../target_ompx_dyn_cgroup_mem_codegen.cpp | 544 ++++++++++--------
clang/test/OpenMP/target_teams_codegen.cpp | 40 +-
clang/tools/libclang/CIndex.cpp | 2 +-
15 files changed, 478 insertions(+), 312 deletions(-)
diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index 325a1baa44614..2735879c56958 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -6131,43 +6131,54 @@ class OMPMapClause final : public OMPMappableExprListClause<OMPMapClause>,
/// \endcode
/// In this example directive '#pragma omp teams' has clause 'num_teams'
/// with single expression 'n'.
-class OMPNumTeamsClause : public OMPClause, public OMPClauseWithPreInit {
- friend class OMPClauseReader;
+///
+/// When 'ompx_bare' clause exists on a 'target' directive, 'num_teams' clause
+/// can accept up to three expressions.
+///
+/// \code
+/// #pragma omp target teams ompx_bare num_teams(x, y, z)
+/// \endcode
+class OMPNumTeamsClause final
+ : public OMPVarListClause<OMPNumTeamsClause>,
+ public OMPClauseWithPreInit,
+ private llvm::TrailingObjects<OMPNumTeamsClause, Expr *> {
+ friend OMPVarListClause;
+ friend TrailingObjects;
/// Location of '('.
SourceLocation LParenLoc;
- /// NumTeams number.
- Stmt *NumTeams = nullptr;
+ OMPNumTeamsClause(const ASTContext &C, SourceLocation StartLoc,
+ SourceLocation LParenLoc, SourceLocation EndLoc, unsigned N)
+ : OMPVarListClause(llvm::omp::OMPC_num_teams, StartLoc, LParenLoc, EndLoc,
+ N),
+ OMPClauseWithPreInit(this) {}
- /// Set the NumTeams number.
- ///
- /// \param E NumTeams number.
- void setNumTeams(Expr *E) { NumTeams = E; }
+ /// Build an empty clause.
+ OMPNumTeamsClause(unsigned N)
+ : OMPVarListClause(llvm::omp::OMPC_num_teams, SourceLocation(),
+ SourceLocation(), SourceLocation(), N),
+ OMPClauseWithPreInit(this) {}
public:
- /// Build 'num_teams' clause.
+ /// Creates clause with a list of variables \a VL.
///
- /// \param E Expression associated with this clause.
- /// \param HelperE Helper Expression associated with this clause.
- /// \param CaptureRegion Innermost OpenMP region where expressions in this
- /// clause must be captured.
+ /// \param C AST context.
/// \param StartLoc Starting location of the clause.
/// \param LParenLoc Location of '('.
/// \param EndLoc Ending location of the clause.
- OMPNumTeamsClause(Expr *E, Stmt *HelperE, OpenMPDirectiveKind CaptureRegion,
- SourceLocation StartLoc, SourceLocation LParenLoc,
- SourceLocation EndLoc)
- : OMPClause(llvm::omp::OMPC_num_teams, StartLoc, EndLoc),
- OMPClauseWithPreInit(this), LParenLoc(LParenLoc), NumTeams(E) {
- setPreInitStmt(HelperE, CaptureRegion);
- }
+ /// \param VL List of references to the variables.
+ /// \param PreInit
+ static OMPNumTeamsClause *Create(const ASTContext &C, SourceLocation StartLoc,
+ SourceLocation LParenLoc,
+ SourceLocation EndLoc, ArrayRef<Expr *> VL,
+ Stmt *PreInit);
- /// Build an empty clause.
- OMPNumTeamsClause()
- : OMPClause(llvm::omp::OMPC_num_teams, SourceLocation(),
- SourceLocation()),
- OMPClauseWithPreInit(this) {}
+ /// Creates an empty clause with \a N variables.
+ ///
+ /// \param C AST context.
+ /// \param N The number of variables.
+ static OMPNumTeamsClause *CreateEmpty(const ASTContext &C, unsigned N);
/// Sets the location of '('.
void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
@@ -6175,16 +6186,22 @@ class OMPNumTeamsClause : public OMPClause, public OMPClauseWithPreInit {
/// Returns the location of '('.
SourceLocation getLParenLoc() const { return LParenLoc; }
- /// Return NumTeams number.
- Expr *getNumTeams() { return cast<Expr>(NumTeams); }
+ /// Return NumTeams expressions.
+ ArrayRef<Expr *> getNumTeams() { return getVarRefs(); }
- /// Return NumTeams number.
- Expr *getNumTeams() const { return cast<Expr>(NumTeams); }
+ /// Return NumTeams expressions.
+ ArrayRef<Expr *> getNumTeams() const {
+ return const_cast<OMPNumTeamsClause *>(this)->getNumTeams();
+ }
- child_range children() { return child_range(&NumTeams, &NumTeams + 1); }
+ child_range children() {
+ return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
+ reinterpret_cast<Stmt **>(varlist_end()));
+ }
const_child_range children() const {
- return const_child_range(&NumTeams, &NumTeams + 1);
+ auto Children = const_cast<OMPNumTeamsClause *>(this)->children();
+ return const_child_range(Children.begin(), Children.end());
}
child_range used_children() {
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index e3c0cb46799f7..beb7b3597c2a8 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -3793,8 +3793,8 @@ bool RecursiveASTVisitor<Derived>::VisitOMPMapClause(OMPMapClause *C) {
template <typename Derived>
bool RecursiveASTVisitor<Derived>::VisitOMPNumTeamsClause(
OMPNumTeamsClause *C) {
+ TRY_TO(VisitOMPClauseList(C));
TRY_TO(VisitOMPClauseWithPreInit(C));
- TRY_TO(TraverseStmt(C->getNumTeams()));
return true;
}
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index aa61dae9415e2..703c1511fc3ae 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -1226,7 +1226,8 @@ class SemaOpenMP : public SemaBase {
const OMPVarListLocTy &Locs, bool NoDiagnose = false,
ArrayRef<Expr *> UnresolvedMappers = std::nullopt);
/// Called on well-formed 'num_teams' clause.
- OMPClause *ActOnOpenMPNumTeamsClause(Expr *NumTeams, SourceLocation StartLoc,
+ OMPClause *ActOnOpenMPNumTeamsClause(ArrayRef<Expr *> VarList,
+ SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc);
/// Called on well-formed 'thread_limit' clause.
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 042a5df5906ca..ee9e9a0d39a92 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -1720,6 +1720,24 @@ const Expr *OMPDoacrossClause::getLoopData(unsigned NumLoop) const {
return *It;
}
+OMPNumTeamsClause *
+OMPNumTeamsClause::Create(const ASTContext &C, SourceLocation StartLoc,
+ SourceLocation LParenLoc, SourceLocation EndLoc,
+ ArrayRef<Expr *> VL, Stmt *PreInit) {
+ void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(VL.size()));
+ OMPNumTeamsClause *Clause =
+ new (Mem) OMPNumTeamsClause(C, StartLoc, LParenLoc, EndLoc, VL.size());
+ Clause->setVarRefs(VL);
+ Clause->setPreInitStmt(PreInit);
+ return Clause;
+}
+
+OMPNumTeamsClause *OMPNumTeamsClause::CreateEmpty(const ASTContext &C,
+ unsigned N) {
+ void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(N));
+ return new (Mem) OMPNumTeamsClause(N);
+}
+
//===----------------------------------------------------------------------===//
// OpenMP clauses printing methods
//===----------------------------------------------------------------------===//
@@ -1977,9 +1995,11 @@ void OMPClausePrinter::VisitOMPDeviceClause(OMPDeviceClause *Node) {
}
void OMPClausePrinter::VisitOMPNumTeamsClause(OMPNumTeamsClause *Node) {
- OS << "num_teams(";
- Node->getNumTeams()->printPretty(OS, nullptr, Policy, 0);
- OS << ")";
+ if (!Node->varlist_empty()) {
+ OS << "num_teams";
+ VisitOMPClauseList(Node, '(');
+ OS << ")";
+ }
}
void OMPClausePrinter::VisitOMPThreadLimitClause(OMPThreadLimitClause *Node) {
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 89d2a422509d8..b782a4ab8367e 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -843,9 +843,8 @@ void OMPClauseProfiler::VisitOMPAllocateClause(const OMPAllocateClause *C) {
VisitOMPClauseList(C);
}
void OMPClauseProfiler::VisitOMPNumTeamsClause(const OMPNumTeamsClause *C) {
+ VisitOMPClauseList(C);
VistOMPClauseWithPreInit(C);
- if (C->getNumTeams())
- Profiler->VisitStmt(C->getNumTeams());
}
void OMPClauseProfiler::VisitOMPThreadLimitClause(
const OMPThreadLimitClause *C) {
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index ec644acf68a20..ed65df85b6729 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6036,8 +6036,9 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
- const Expr *NumTeams =
- NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
+ const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
+ ->getNumTeams()
+ .front();
if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
if (auto Constant =
NumTeams->getIntegerConstantExpr(CGF.getContext()))
@@ -6062,7 +6063,7 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
case OMPD_target_teams_distribute_parallel_for_simd: {
if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
const Expr *NumTeams =
- D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
+ D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 4ee9840f12e06..d461d2ab9ab5b 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -6859,7 +6859,7 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
if (NT || TL) {
- const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
+ const Expr *NumTeams = NT ? NT->getNumTeams().front() : nullptr;
const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index e975e96c5c7e4..50930aa0e9a4a 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -3098,7 +3098,6 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
case OMPC_simdlen:
case OMPC_collapse:
case OMPC_ordered:
- case OMPC_num_teams:
case OMPC_thread_limit:
case OMPC_priority:
case OMPC_grainsize:
@@ -3252,6 +3251,13 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
? ParseOpenMPSimpleClause(CKind, WrongDirective)
: ParseOpenMPClause(CKind, WrongDirective);
break;
+ case OMPC_num_teams:
+ if (!FirstClause) {
+ Diag(Tok, diag::err_omp_more_one_clause)
+ << getOpenMPDirectiveName(DKind) << getOpenMPClauseName(CKind) << 0;
+ ErrorFound = true;
+ }
+ [[clang::fallthrough]];
case OMPC_private:
case OMPC_firstprivate:
case OMPC_lastprivate:
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 9c80b3eec914c..c982c07682ec7 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -13785,6 +13785,16 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDirective(
return StmtError();
}
+ auto NumTeamsClauseItr =
+ llvm::find_if(Clauses, llvm::IsaPred<OMPNumTeamsClause>);
+ if (NumTeamsClauseItr != Clauses.end()) {
+ ArrayRef<const Expr *> NumTeams =
+ cast<OMPNumTeamsClause>(*NumTeamsClauseItr)->getNumTeams();
+ if (!HasBareClause && NumTeams.size() > 1) {
+ return StmtError();
+ }
+ }
+
return OMPTargetTeamsDirective::Create(getASTContext(), StartLoc, EndLoc,
Clauses, AStmt);
}
@@ -14925,9 +14935,6 @@ OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind,
case OMPC_ordered:
Res = ActOnOpenMPOrderedClause(StartLoc, EndLoc, LParenLoc, Expr);
break;
- case OMPC_num_teams:
- Res = ActOnOpenMPNumTeamsClause(Expr, StartLoc, LParenLoc, EndLoc);
- break;
case OMPC_thread_limit:
Res = ActOnOpenMPThreadLimitClause(Expr, StartLoc, LParenLoc, EndLoc);
break;
@@ -15031,6 +15038,7 @@ OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind,
case OMPC_affinity:
case OMPC_when:
case OMPC_bind:
+ case OMPC_num_teams:
default:
llvm_unreachable("Clause is not allowed.");
}
@@ -16894,6 +16902,9 @@ OMPClause *SemaOpenMP::ActOnOpenMPVarListClause(OpenMPClauseKind Kind,
static_cast<OpenMPDoacrossClauseModifier>(ExtraModifier),
ExtraModifierLoc, ColonLoc, VarList, StartLoc, LParenLoc, EndLoc);
break;
+ case OMPC_num_teams:
+ Res = ActOnOpenMPNumTeamsClause(VarList, StartLoc, LParenLoc, EndLoc);
+ break;
case OMPC_if:
case OMPC_depobj:
case OMPC_final:
@@ -16924,7 +16935,6 @@ OMPClause *SemaOpenMP::ActOnOpenMPVarListClause(OpenMPClauseKind Kind,
case OMPC_device:
case OMPC_threads:
case OMPC_simd:
- case OMPC_num_teams:
case OMPC_thread_limit:
case OMPC_priority:
case OMPC_grainsize:
@@ -21587,32 +21597,39 @@ const ValueDecl *SemaOpenMP::getOpenMPDeclareMapperVarName() const {
return cast<DeclRefExpr>(DSAStack->getDeclareMapperVarRef())->getDecl();
}
-OMPClause *SemaOpenMP::ActOnOpenMPNumTeamsClause(Expr *NumTeams,
+OMPClause *SemaOpenMP::ActOnOpenMPNumTeamsClause(ArrayRef<Expr *> VarList,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
- Expr *ValExpr = NumTeams;
- Stmt *HelperValStmt = nullptr;
-
- // OpenMP [teams Constrcut, Restrictions]
- // The num_teams expression must evaluate to a positive integer value.
- if (!isNonNegativeIntegerValue(ValExpr, SemaRef, OMPC_num_teams,
- /*StrictlyPositive=*/true))
+ if (VarList.empty())
return nullptr;
+ for (Expr *ValExpr : VarList) {
+ // OpenMP [teams Constrcut, Restrictions]
+ // The num_teams expression must evaluate to a positive integer value.
+ if (!isNonNegativeIntegerValue(ValExpr, SemaRef, OMPC_num_teams,
+ /*StrictlyPositive=*/true))
+ return nullptr;
+ }
+
OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective();
OpenMPDirectiveKind CaptureRegion = getOpenMPCaptureRegionForClause(
DKind, OMPC_num_teams, getLangOpts().OpenMP);
- if (CaptureRegion != OMPD_unknown &&
- !SemaRef.CurContext->isDependentContext()) {
+ if (CaptureRegion == OMPD_unknown || SemaRef.CurContext->isDependentContext())
+ return OMPNumTeamsClause::Create(getASTContext(), StartLoc, LParenLoc,
+ EndLoc, VarList, /*PreInit=*/nullptr);
+
+ llvm::MapVector<const Expr *, DeclRefExpr *> Captures;
+ SmallVector<Expr *, 3> Vars;
+ for (Expr *ValExpr : VarList) {
ValExpr = SemaRef.MakeFullExpr(ValExpr).get();
- llvm::MapVector<const Expr *, DeclRefExpr *> Captures;
ValExpr = tryBuildCapture(SemaRef, ValExpr, Captures).get();
- HelperValStmt = buildPreInits(getASTContext(), Captures);
+ Vars.push_back(ValExpr);
}
- return new (getASTContext()) OMPNumTeamsClause(
- ValExpr, HelperValStmt, CaptureRegion, StartLoc, LParenLoc, EndLoc);
+ Stmt *PreInit = buildPreInits(getASTContext(), Captures);
+ return OMPNumTeamsClause::Create(getASTContext(), StartLoc, LParenLoc, EndLoc,
+ Vars, PreInit);
}
OMPClause *SemaOpenMP::ActOnOpenMPThreadLimitClause(Expr *ThreadLimit,
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 4d68ebf0cc452..bad8bed396690 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -2065,10 +2065,11 @@ class TreeTransform {
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
- OMPClause *RebuildOMPNumTeamsClause(Expr *NumTeams, SourceLocation StartLoc,
+ OMPClause *RebuildOMPNumTeamsClause(ArrayRef<Expr *> VarList,
+ SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
- return getSema().OpenMP().ActOnOpenMPNumTeamsClause(NumTeams, StartLoc,
+ return getSema().OpenMP().ActOnOpenMPNumTeamsClause(VarList, StartLoc,
LParenLoc, EndLoc);
}
@@ -10872,7 +10873,7 @@ TreeTransform<Derived>::TransformOMPAllocateClause(OMPAllocateClause *C) {
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPNumTeamsClause(OMPNumTeamsClause *C) {
- ExprResult E = getDerived().TransformExpr(C->getNumTeams());
+ ExprResult E = getDerived().TransformExpr(C->getNumTeams().front());
if (E.isInvalid())
return nullptr;
return getDerived().RebuildOMPNumTeamsClause(
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 3cb96df12e4da..1fa4d25523da2 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -104,6 +104,7 @@
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
@@ -10562,7 +10563,7 @@ OMPClause *OMPClauseReader::readClause() {
break;
}
case llvm::omp::OMPC_num_teams:
- C = new (Context) OMPNumTeamsClause();
+ C = OMPNumTeamsClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_thread_limit:
C = new (Context) OMPThreadLimitClause();
@@ -11350,8 +11351,13 @@ void OMPClauseReader::VisitOMPAllocateClause(OMPAllocateClause *C) {
void OMPClauseReader::VisitOMPNumTeamsClause(OMPNumTeamsClause *C) {
VisitOMPClauseWithPreInit(C);
- C->setNumTeams(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
+ unsigned NumVars = C->varlist_size();
+ SmallVector<Expr *, 16> Vars;
+ Vars.reserve(NumVars);
+ for ([[maybe_unused]] unsigned I : llvm::seq<unsigned>(NumVars))
+ Vars.push_back(Record.readSubExpr());
+ C->setVarRefs(Vars);
}
void OMPClauseReader::VisitOMPThreadLimitClause(OMPThreadLimitClause *C) {
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index c78d8943d6d92..ebc5306b4d399 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -7528,9 +7528,11 @@ void OMPClauseWriter::VisitOMPAllocateClause(OMPAllocateClause *C) {
}
void OMPClauseWriter::VisitOMPNumTeamsClause(OMPNumTeamsClause *C) {
+ Record.push_back(C->varlist_size());
VisitOMPClauseWithPreInit(C);
- Record.AddStmt(C->getNumTeams());
Record.AddSourceLocation(C->getLParenLoc());
+ for (auto *VE : C->varlists())
+ Record.AddStmt(VE);
}
void OMPClauseWriter::VisitOMPThreadLimitClause(OMPThreadLimitClause *C) {
diff --git a/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp b/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp
index 10c4ac38e6a89..d8d960c187cd3 100644
--- a/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp
+++ b/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp
@@ -653,6 +653,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
@@ -662,44 +663,50 @@ int bar(int n){
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4
// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined, i64 [[TMP3]])
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined, i64 [[TMP3]], i64 [[TMP5]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]]) #[[ATTR1]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
@@ -707,11 +714,11 @@ int bar(int n){
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
+// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
@@ -725,8 +732,8 @@ int bar(int n){
// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]]
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]]
-// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
-// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
+// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64
@@ -735,30 +742,33 @@ int bar(int n){
// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK1-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP11]]
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined.omp_outlined, i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]]), !llvm.access.group [[ACC_GRP11]]
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP11]]
+// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP11]]
+// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP11]]
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined.omp_outlined, i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]]), !llvm.access.group [[ACC_GRP11]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]]
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]]
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]]
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]]
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
-// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
+// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]])
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
+// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK1: .omp.final.then:
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0
-// CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1
-// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD8]], ptr [[I3]], align 4
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0
+// CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1
+// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK1-NEXT: store i32 [[ADD9]], ptr [[I4]], align 4
// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK1: .omp.final.done:
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -767,60 +777,62 @@ int bar(int n){
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]]) #[[ATTR1]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32
// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32
+// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32
// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
-// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
+// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
@@ -834,20 +846,20 @@ int bar(int n){
// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]]
-// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
-// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
+// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]]
-// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1
+// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -859,12 +871,12 @@ int bar(int n){
// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK1: .omp.final.then:
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0
-// CHECK1-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1
-// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1
-// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK1-NEXT: store i32 [[ADD11]], ptr [[I4]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP21]], 0
+// CHECK1-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1
+// CHECK1-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1
+// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]]
+// CHECK1-NEXT: store i32 [[ADD12]], ptr [[I5]], align 4
// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK1: .omp.final.done:
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -1021,6 +1033,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
@@ -1034,21 +1047,26 @@ int bar(int n){
// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2
// CHECK1-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2
// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i64 [[TMP4]], i64 [[TMP6]])
+// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2
+// CHECK1-NEXT: store i16 [[TMP7]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 2
+// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
@@ -1535,6 +1553,7 @@ int bar(int n){
// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
@@ -1544,44 +1563,50 @@ int bar(int n){
// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined, i32 [[TMP3]])
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined, i32 [[TMP3]], i32 [[TMP5]])
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]]) #[[ATTR1]] {
+// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3: omp.precond.then:
// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
@@ -1589,11 +1614,11 @@ int bar(int n){
// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
-// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
+// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3: cond.true:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
@@ -1607,38 +1632,41 @@ int bar(int n){
// CHECK3: omp.inner.for.cond:
// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]]
// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]]
-// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
-// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
+// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK3-NEXT: store i32 [[TMP15]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]]
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined.omp_outlined, i32 [[TMP13]], i32 [[TMP14]], i32 [[TMP16]]), !llvm.access.group [[ACC_GRP12]]
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP12]]
+// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP12]]
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP12]]
+// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined.omp_outlined, i32 [[TMP13]], i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]]), !llvm.access.group [[ACC_GRP12]]
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]]
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]]
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
+// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]]
+// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]]
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]]
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
-// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
-// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
+// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
+// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
+// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK3: .omp.final.then:
-// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP23]], 0
-// CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
-// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1
-// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD8]], ptr [[I3]], align 4
+// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0
+// CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1
+// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1
+// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK3-NEXT: store i32 [[ADD9]], ptr [[I4]], align 4
// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK3: .omp.final.done:
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
@@ -1647,42 +1675,44 @@ int bar(int n){
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined.omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]]) #[[ATTR1]] {
+// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3: omp.precond.then:
// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
@@ -1694,11 +1724,11 @@ int bar(int n){
// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
-// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
+// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3: cond.true:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
@@ -1712,20 +1742,20 @@ int bar(int n){
// CHECK3: omp.inner.for.cond:
// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]]
// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
-// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
+// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1
+// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -1737,12 +1767,12 @@ int bar(int n){
// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK3: .omp.final.then:
-// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP21]], 0
-// CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1
-// CHECK3-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1
-// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]]
-// CHECK3-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
+// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0
+// CHECK3-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1
+// CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1
+// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
+// CHECK3-NEXT: store i32 [[ADD11]], ptr [[I4]], align 4
// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK3: .omp.final.done:
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
@@ -1899,6 +1929,7 @@ int bar(int n){
// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
@@ -1912,21 +1943,26 @@ int bar(int n){
// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2
// CHECK3-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i32 [[TMP4]], i32 [[TMP6]])
+// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2
+// CHECK3-NEXT: store i16 [[TMP7]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 2
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]])
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2
// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
@@ -1943,6 +1979,7 @@ int bar(int n){
// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
@@ -1953,44 +1990,50 @@ int bar(int n){
// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4
// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined, i64 [[TMP3]])
+// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined, i64 [[TMP3]], i64 [[TMP5]])
// CHECK9-NEXT: ret void
//
//
// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined
-// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
+// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] {
// CHECK9-NEXT: entry:
// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK9-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
// CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK9: omp.precond.then:
// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
@@ -1998,11 +2041,11 @@ int bar(int n){
// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
-// CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
+// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK9: cond.true:
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK9-NEXT: br label [[COND_END:%.*]]
// CHECK9: cond.false:
// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
@@ -2016,8 +2059,8 @@ int bar(int n){
// CHECK9: omp.inner.for.cond:
// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]]
// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]]
-// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
-// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
+// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK9-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64
@@ -2026,30 +2069,33 @@ int bar(int n){
// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK9-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP10]]
-// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined.omp_outlined, i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]]), !llvm.access.group [[ACC_GRP10]]
+// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP10]]
+// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined.omp_outlined, i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]]), !llvm.access.group [[ACC_GRP10]]
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]]
-// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP10]]
-// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
+// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]]
// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
-// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
-// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
-// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
+// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]])
+// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
+// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK9: .omp.final.then:
-// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0
-// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
-// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1
-// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK9-NEXT: store i32 [[ADD8]], ptr [[I3]], align 4
+// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0
+// CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1
+// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1
+// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK9-NEXT: store i32 [[ADD9]], ptr [[I4]], align 4
// CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK9: .omp.final.done:
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
@@ -2058,60 +2104,62 @@ int bar(int n){
//
//
// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined.omp_outlined
-// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
+// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] {
// CHECK9-NEXT: entry:
// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4
// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK9-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
// CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK9: omp.precond.then:
// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4
// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32
// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32
+// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32
// CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
+// CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4
// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
-// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
+// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK9: cond.true:
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK9-NEXT: br label [[COND_END:%.*]]
// CHECK9: cond.false:
// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
@@ -2125,20 +2173,20 @@ int bar(int n){
// CHECK9: omp.inner.for.cond:
// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]]
// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]]
-// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
-// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
+// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]]
-// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1
+// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -2150,12 +2198,12 @@ int bar(int n){
// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK9: .omp.final.then:
-// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0
-// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1
-// CHECK9-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1
-// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK9-NEXT: store i32 [[ADD11]], ptr [[I4]], align 4
+// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP21]], 0
+// CHECK9-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1
+// CHECK9-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1
+// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]]
+// CHECK9-NEXT: store i32 [[ADD12]], ptr [[I5]], align 4
// CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK9: .omp.final.done:
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
@@ -2265,6 +2313,7 @@ int bar(int n){
// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
@@ -2279,21 +2328,26 @@ int bar(int n){
// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2
// CHECK9-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2
// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8
-// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i64 [[TMP4]], i64 [[TMP6]])
+// CHECK9-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2
+// CHECK9-NEXT: store i16 [[TMP7]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 2
+// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]])
// CHECK9-NEXT: ret void
//
//
// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined
-// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] {
// CHECK9-NEXT: entry:
// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
+// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2
// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
@@ -2310,6 +2364,7 @@ int bar(int n){
// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
@@ -2320,44 +2375,50 @@ int bar(int n){
// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4
// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined, i32 [[TMP3]])
+// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined, i32 [[TMP3]], i32 [[TMP5]])
// CHECK11-NEXT: ret void
//
//
// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined
-// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] {
// CHECK11-NEXT: entry:
// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK11-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK11: omp.precond.then:
// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
@@ -2365,11 +2426,11 @@ int bar(int n){
// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
-// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
+// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK11: cond.true:
-// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK11-NEXT: br label [[COND_END:%.*]]
// CHECK11: cond.false:
// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
@@ -2383,38 +2444,41 @@ int bar(int n){
// CHECK11: omp.inner.for.cond:
// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]]
// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]]
-// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
-// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
+// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK11-NEXT: store i32 [[TMP15]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP11]]
-// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined.omp_outlined, i32 [[TMP13]], i32 [[TMP14]], i32 [[TMP16]]), !llvm.access.group [[ACC_GRP11]]
+// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP11]]
+// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP11]]
+// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP11]]
+// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined.omp_outlined, i32 [[TMP13]], i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]]), !llvm.access.group [[ACC_GRP11]]
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]]
-// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]]
-// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
+// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]]
+// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]]
+// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]]
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
-// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]])
-// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
-// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
+// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
+// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
+// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
+// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK11: .omp.final.then:
-// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP23]], 0
-// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
-// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1
-// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK11-NEXT: store i32 [[ADD8]], ptr [[I3]], align 4
+// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0
+// CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1
+// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1
+// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK11-NEXT: store i32 [[ADD9]], ptr [[I4]], align 4
// CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK11: .omp.final.done:
// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
@@ -2423,42 +2487,44 @@ int bar(int n){
//
//
// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined.omp_outlined
-// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] {
// CHECK11-NEXT: entry:
// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4
// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK11-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK11: omp.precond.then:
// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4
// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
@@ -2470,11 +2536,11 @@ int bar(int n){
// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
-// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
+// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK11: cond.true:
-// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK11-NEXT: br label [[COND_END:%.*]]
// CHECK11: cond.false:
// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
@@ -2488,20 +2554,20 @@ int bar(int n){
// CHECK11: omp.inner.for.cond:
// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]]
-// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
-// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
+// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK11: omp.body.continue:
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]]
-// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1
+// CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -2513,12 +2579,12 @@ int bar(int n){
// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK11: .omp.final.then:
-// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP21]], 0
-// CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1
-// CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1
-// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]]
-// CHECK11-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
+// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0
+// CHECK11-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1
+// CHECK11-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1
+// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
+// CHECK11-NEXT: store i32 [[ADD11]], ptr [[I4]], align 4
// CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK11: .omp.final.done:
// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
@@ -2628,6 +2694,7 @@ int bar(int n){
// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -2642,21 +2709,26 @@ int bar(int n){
// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2
// CHECK11-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2
// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4
-// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i32 [[TMP4]], i32 [[TMP6]])
+// CHECK11-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2
+// CHECK11-NEXT: store i16 [[TMP7]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 2
+// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]])
// CHECK11-NEXT: ret void
//
//
// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined
-// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] {
// CHECK11-NEXT: entry:
// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2
// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp
index 24dc2fd2e49f4..932f2b5054ddd 100644
--- a/clang/test/OpenMP/target_teams_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_codegen.cpp
@@ -812,6 +812,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
@@ -822,19 +823,24 @@ int bar(int n){
// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.omp_outlined, i64 [[TMP4]])
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.omp_outlined, i64 [[TMP4]], i64 [[TMP6]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: ret void
//
//
@@ -2401,6 +2407,7 @@ int bar(int n){
// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
@@ -2411,19 +2418,24 @@ int bar(int n){
// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.omp_outlined, i32 [[TMP4]])
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.omp_outlined, i32 [[TMP4]], i32 [[TMP6]])
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] {
+// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
+// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK3-NEXT: ret void
//
//
@@ -3493,6 +3505,7 @@ int bar(int n){
// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
@@ -3504,19 +3517,24 @@ int bar(int n){
// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2
// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8
-// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.omp_outlined, i64 [[TMP4]])
+// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.omp_outlined, i64 [[TMP4]], i64 [[TMP6]])
// CHECK9-NEXT: ret void
//
//
// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.omp_outlined
-// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] {
+// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] {
// CHECK9-NEXT: entry:
// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
+// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK9-NEXT: ret void
//
//
@@ -4054,6 +4072,7 @@ int bar(int n){
// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
@@ -4065,19 +4084,24 @@ int bar(int n){
// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2
// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4
-// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.omp_outlined, i32 [[TMP4]])
+// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.omp_outlined, i32 [[TMP4]], i32 [[TMP6]])
// CHECK11-NEXT: ret void
//
//
// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.omp_outlined
-// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] {
+// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] {
// CHECK11-NEXT: entry:
// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
+// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK11-NEXT: ret void
//
//
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index d8af5abce7cee..0354426da6eef 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2499,8 +2499,8 @@ void OMPClauseEnqueue::VisitOMPDeviceClause(const OMPDeviceClause *C) {
}
void OMPClauseEnqueue::VisitOMPNumTeamsClause(const OMPNumTeamsClause *C) {
+ VisitOMPClauseList(C);
VisitOMPClauseWithPreInit(C);
- Visitor->AddStmt(C->getNumTeams());
}
void OMPClauseEnqueue::VisitOMPThreadLimitClause(
More information about the cfe-commits
mailing list