[clang] [OpenMP][CodeGen] Improved codegen for combined loop directives (PR #72417)
David Pagan via cfe-commits
cfe-commits at lists.llvm.org
Wed Nov 15 09:39:27 PST 2023
https://github.com/ddpagan created https://github.com/llvm/llvm-project/pull/72417
IR for 'target teams loop' is now dependent on suitability of associated loop-nest.
If a loop-nest:
- does not contain a function call, or
- the -fopenmp-assume-no-nested-parallelism has been specified,
- or the call is to an OpenMP API AND
- does not contain nested loop bind(parallel) directives
then it can be emitted as 'target teams distribute parallel for', which is the current default. Otherwise, it is emitted as 'target teams distribute'.
Added debug output indicating how 'target teams loop' was emitted. Flag is -mllvm -debug-only=target-teams-loop-codegen
Added LIT tests explicitly verifying 'target teams loop' emitted as a parallel loop and a distribute loop.
Updated other 'loop' related tests as needed to reflect change in IR.
- These updates account for most of the changed files and additions/deletions.
>From 8710c48fb90373ebd2afe1afa7399a6643b52c37 Mon Sep 17 00:00:00 2001
From: Dave Pagan <dave.pagan at amd.com>
Date: Fri, 3 Nov 2023 15:15:27 -0500
Subject: [PATCH] [OpenMP][CodeGen] Improved codegen for combined loop
directives
IR for 'target teams loop' is now dependent on suitability of
associated loop-nest.
If a loop-nest:
- does not contain a function call, or
- the -fopenmp-assume-no-nested-parallelism has been specified,
- or the call is to an OpenMP API
AND
- does not contain nested loop bind(parallel) directives
then it can be emitted as 'target teams distribute parallel for', which
is the current default. Otherwise, it is emitted as
'target teams distribute'.
Added debug output indicating how 'target teams loop' was emitted. Flag
is -mllvm -debug-only=target-teams-loop-codegen
Added LIT tests explicitly verifying 'target teams loop' emitted as
a parallel loop and a distribute loop.
Updated other 'loop' related tests as needed to reflect change in IR.
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 9 +-
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 10 +-
clang/lib/CodeGen/CGStmtOpenMP.cpp | 65 +-
clang/lib/CodeGen/CodeGenModule.cpp | 93 +
clang/lib/CodeGen/CodeGenModule.h | 8 +
clang/lib/Sema/SemaOpenMP.cpp | 10 +-
...vptx_target_teams_generic_loop_codegen.cpp | 114 +-
...eams_generic_loop_generic_mode_codegen.cpp | 397 +-
.../target_teams_generic_loop_codegen-1.cpp | 50 +-
.../target_teams_generic_loop_codegen.cpp | 38 +-
...ams_generic_loop_codegen_as_distribute.cpp | 1685 +++++++
...s_generic_loop_codegen_as_parallel_for.cpp | 3998 +++++++++++++++++
...et_teams_generic_loop_collapse_codegen.cpp | 32 +-
.../target_teams_generic_loop_if_codegen.cpp | 718 +--
...get_teams_generic_loop_private_codegen.cpp | 1442 +-----
...s_generic_loop_uses_allocators_codegen.cpp | 6 +-
.../OpenMP/teams_generic_loop_codegen-1.cpp | 1356 +-----
.../OpenMP/teams_generic_loop_codegen.cpp | 666 +--
.../teams_generic_loop_collapse_codegen.cpp | 820 +---
.../teams_generic_loop_private_codegen.cpp | 681 +--
.../teams_generic_loop_reduction_codegen.cpp | 811 +---
21 files changed, 7098 insertions(+), 5911 deletions(-)
create mode 100644 clang/test/OpenMP/target_teams_generic_loop_codegen_as_distribute.cpp
create mode 100644 clang/test/OpenMP/target_teams_generic_loop_codegen_as_parallel_for.cpp
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index d2be8141a3a4b31..97f963df7ad67b4 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -2644,7 +2644,8 @@ void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
// Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
llvm::Value *Args[] = {
emitUpdateLocation(CGF, Loc,
- isOpenMPDistributeDirective(DKind)
+ isOpenMPDistributeDirective(DKind) ||
+ (DKind == OMPD_target_teams_loop)
? OMP_IDENT_WORK_DISTRIBUTE
: isOpenMPLoopDirective(DKind)
? OMP_IDENT_WORK_LOOP
@@ -8779,7 +8780,8 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
switch (D.getDirectiveKind()) {
case OMPD_target:
- // For now, just treat 'target teams loop' as if it's distributed.
+ // For now, treat 'target' with nested 'teams loop' as if it's
+ // distributed (target teams distribute).
if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
return NestedDir;
if (DKind == OMPD_teams) {
@@ -9263,7 +9265,8 @@ llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
SizeEmitter) {
OpenMPDirectiveKind Kind = D.getDirectiveKind();
const OMPExecutableDirective *TD = &D;
- // Get nested teams distribute kind directive, if any.
+ // Get nested teams distribute kind directive, if any. For now, treat
+ // 'target_teams_loop' as if it's really a target_teams_distribute.
if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
Kind != OMPD_target_teams_loop)
TD = getNestedDistributeDirective(CGM.getContext(), D);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index abecf5250f4cf96..2e1cf1ed3abf40c 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -639,14 +639,14 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
return false;
}
-static bool supportsSPMDExecutionMode(ASTContext &Ctx,
+static bool supportsSPMDExecutionMode(CodeGenModule &CGM,
const OMPExecutableDirective &D) {
+ ASTContext &Ctx = CGM.getContext();
OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
switch (DirectiveKind) {
case OMPD_target:
case OMPD_target_teams:
return hasNestedSPMDDirective(Ctx, D);
- case OMPD_target_teams_loop:
case OMPD_target_parallel_loop:
case OMPD_target_parallel:
case OMPD_target_parallel_for:
@@ -658,6 +658,10 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
return true;
case OMPD_target_teams_distribute:
return false;
+ case OMPD_target_teams_loop:
+ // Whether this is true or not depends on how the directive will
+ // eventually be emitted.
+ return CGM.teamsLoopCanBeParallelFor(D);
case OMPD_parallel:
case OMPD_for:
case OMPD_parallel_for:
@@ -870,7 +874,7 @@ void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
assert(!ParentName.empty() && "Invalid target region parent name!");
- bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D);
+ bool Mode = supportsSPMDExecutionMode(CGM, D);
bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
if (Mode || IsBareKernel)
emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 62b056c5d08a18c..16f84cab181b427 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -34,11 +34,14 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Debug.h"
#include <optional>
using namespace clang;
using namespace CodeGen;
using namespace llvm::omp;
+#define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
+
static const VarDecl *getBaseDecl(const Expr *Ref);
namespace {
@@ -1435,6 +1438,7 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
}
bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
isOpenMPParallelDirective(D.getDirectiveKind()) ||
+ CGM.teamsLoopCanBeParallelFor(D) ||
ReductionKind == OMPD_simd;
bool SimpleReduction = ReductionKind == OMPD_simd;
// Emit nowait reduction if nowait clause is present or directive is a
@@ -7876,11 +7880,9 @@ void CodeGenFunction::EmitOMPParallelGenericLoopDirective(
void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
const OMPTeamsGenericLoopDirective &S) {
// To be consistent with current behavior of 'target teams loop', emit
- // 'teams loop' as if its constituent constructs are 'distribute,
- // 'parallel, and 'for'.
+ // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'.
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
- S.getDistInc());
+ CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
};
// Emit teams region as a standalone region.
@@ -7894,15 +7896,14 @@ void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
CodeGenDistribute);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
};
- emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
+ emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
emitPostUpdateForReductionClause(*this, S,
[](CodeGenFunction &) { return nullptr; });
}
-static void
-emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF,
- const OMPTargetTeamsGenericLoopDirective &S,
- PrePostActionTy &Action) {
+static void emitTargetTeamsGenericLoopRegionAsParallel(
+ CodeGenFunction &CGF, PrePostActionTy &Action,
+ const OMPTargetTeamsGenericLoopDirective &S) {
Action.Enter(CGF);
// Emit 'teams loop' as if its constituent constructs are 'distribute,
// 'parallel, and 'for'.
@@ -7922,19 +7923,52 @@ emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF,
CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
};
-
+ DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
+ CGF.CGM.emitTargetTeamsLoopCodegenStatus(
+ TTL_CODEGEN_TYPE " as parallel for", S,
+ CGF.CGM.getLangOpts().OpenMPIsTargetDevice));
emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
CodeGenTeams);
emitPostUpdateForReductionClause(CGF, S,
[](CodeGenFunction &) { return nullptr; });
}
-/// Emit combined directive 'target teams loop' as if its constituent
-/// constructs are 'target', 'teams', 'distribute', 'parallel', and 'for'.
+static void emitTargetTeamsGenericLoopRegionAsDistribute(
+ CodeGenFunction &CGF, PrePostActionTy &Action,
+ const OMPTargetTeamsGenericLoopDirective &S) {
+ Action.Enter(CGF);
+ // Emit 'teams loop' as if its constituent construct is 'distribute'.
+ auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
+ };
+
+ // Emit teams region as a standalone region.
+ auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+ CGF.EmitOMPReductionClauseInit(S, PrivateScope);
+ (void)PrivateScope.Privatize();
+ CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
+ CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
+ };
+ DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
+ CGF.CGM.emitTargetTeamsLoopCodegenStatus(
+ TTL_CODEGEN_TYPE " as distribute", S,
+ CGF.CGM.getLangOpts().OpenMPIsTargetDevice));
+ emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
+ emitPostUpdateForReductionClause(CGF, S,
+ [](CodeGenFunction &) { return nullptr; });
+}
+
void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective(
const OMPTargetTeamsGenericLoopDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
- emitTargetTeamsGenericLoopRegion(CGF, S, Action);
+ if (CGF.CGM.teamsLoopCanBeParallelFor(S))
+ emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
+ else
+ emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
};
emitCommonOMPTargetDirective(*this, S, CodeGen);
}
@@ -7944,7 +7978,10 @@ void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
const OMPTargetTeamsGenericLoopDirective &S) {
// Emit SPMD target parallel loop region as a standalone region.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
- emitTargetTeamsGenericLoopRegion(CGF, S, Action);
+ if (CGF.CGM.teamsLoopCanBeParallelFor(S))
+ emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
+ else
+ emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
};
llvm::Function *Fn;
llvm::Constant *Addr;
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index f1b900be74b2cdf..0dd389b3a8f1338 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -7485,6 +7485,99 @@ void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS,
}
}
+namespace {
+/// A 'teams loop' with a nested 'loop bind(parallel)' or generic function
+/// call in the associated loop-nest cannot be a 'parllel for'.
+class TeamsLoopChecker final : public ConstStmtVisitor<TeamsLoopChecker> {
+public:
+ TeamsLoopChecker(CodeGenModule &CGM)
+ : CGM(CGM), TeamsLoopCanBeParallelFor{true} {}
+ bool teamsLoopCanBeParallelFor() const {
+ return TeamsLoopCanBeParallelFor;
+ }
+ // Is there a nested OpenMP loop bind(parallel)
+ void VisitOMPExecutableDirective(const OMPExecutableDirective *D) {
+ if (D->getDirectiveKind() == llvm::omp::Directive::OMPD_loop) {
+ if (const auto *C = D->getSingleClause<OMPBindClause>())
+ if (C->getBindKind() == OMPC_BIND_parallel) {
+ TeamsLoopCanBeParallelFor = false;
+ // No need to continue visiting any more
+ return;
+ }
+ }
+ for (const Stmt *Child : D->children())
+ if (Child)
+ Visit(Child);
+ }
+
+ void VisitCallExpr(const CallExpr *C) {
+ // Function calls inhibit parallel loop translation of 'target teams loop'
+ // unless the assume-no-nested-parallelism flag has been specified.
+ // OpenMP API runtime library calls do not inhibit parallel loop
+ // translation, regardless of the assume-no-nested-parallelism.
+ if (C) {
+ bool IsOpenMPAPI = false;
+ auto *FD = dyn_cast_or_null<FunctionDecl>(C->getCalleeDecl());
+ if (FD) {
+ std::string Name = FD->getNameInfo().getAsString();
+ IsOpenMPAPI = Name.find("omp_") == 0;
+ }
+ TeamsLoopCanBeParallelFor =
+ IsOpenMPAPI || CGM.getLangOpts().OpenMPNoNestedParallelism;
+ if (!TeamsLoopCanBeParallelFor)
+ return;
+ }
+ for (const Stmt *Child : C->children())
+ if (Child)
+ Visit(Child);
+ }
+
+ void VisitCapturedStmt(const CapturedStmt *S) {
+ if (!S)
+ return;
+ Visit(S->getCapturedDecl()->getBody());
+ }
+
+ void VisitStmt(const Stmt *S) {
+ if (!S)
+ return;
+ for (const Stmt *Child : S->children())
+ if (Child)
+ Visit(Child);
+ }
+
+private:
+ CodeGenModule &CGM;
+ bool TeamsLoopCanBeParallelFor;
+};
+} // namespace
+
+/// Determine if 'teams loop' can be emitted using 'parallel for'.
+bool CodeGenModule::teamsLoopCanBeParallelFor(const OMPExecutableDirective &D) {
+ if (D.getDirectiveKind() != llvm::omp::Directive::OMPD_target_teams_loop)
+ return false;
+ assert(D.hasAssociatedStmt() &&
+ "Loop directive must have associated statement.");
+ TeamsLoopChecker Checker(*this);
+ Checker.Visit(D.getAssociatedStmt());
+ return Checker.teamsLoopCanBeParallelFor();
+}
+
+void CodeGenModule::emitTargetTeamsLoopCodegenStatus(
+ std::string StatusMsg, const OMPExecutableDirective &D, bool IsDevice) {
+ if (IsDevice)
+ StatusMsg += ": DEVICE";
+ else
+ StatusMsg += ": HOST";
+ SourceLocation L = D.getBeginLoc();
+ SourceManager &SM = getContext().getSourceManager();
+ PresumedLoc PLoc = SM.getPresumedLoc(L);
+ const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr;
+ unsigned LineNo =
+ PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L);
+ llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n";
+}
+
void CodeGenModule::moveLazyEmissionStates(CodeGenModule *NewBuilder) {
assert(DeferredDeclsToEmit.empty() &&
"Should have emitted all decls deferred to emit.");
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index e81edc979c208b1..3f1a64b47a64bde 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1528,6 +1528,8 @@ class CodeGenModule : public CodeGenTypeCache {
LValueBaseInfo *BaseInfo = nullptr,
TBAAAccessInfo *TBAAInfo = nullptr);
bool stopAutoInit();
+ /// Determine if 'teams loop' can be emitted using 'parallel for'.
+ bool teamsLoopCanBeParallelFor(const OMPExecutableDirective &D);
/// Print the postfix for externalized static variable or kernels for single
/// source offloading languages CUDA and HIP. The unique postfix is created
@@ -1537,6 +1539,12 @@ class CodeGenModule : public CodeGenTypeCache {
void printPostfixForExternalizedDecl(llvm::raw_ostream &OS,
const Decl *D) const;
+ /// Under debug mode, print status of target teams loop transformation,
+ /// which should be either '#distribute' or '#parallel for'
+ void emitTargetTeamsLoopCodegenStatus(std::string StatusMsg,
+ const OMPExecutableDirective &D,
+ bool IsDevice);
+
/// Move some lazily-emitted states to the NewBuilder. This is especially
/// essential for the incremental parsing environment like Clang Interpreter,
/// because we'll lose all important information after each repl.
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index dcdd6e7a3f5c762..8cd2adff191e01a 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -4478,6 +4478,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
Params);
break;
}
+ // For 'target teams loop', collect all captured regions so codegen can
+ // later decide the best IR to emit given the associated loop-nest.
case OMPD_target_teams_loop:
case OMPD_target_teams_distribute_parallel_for:
case OMPD_target_teams_distribute_parallel_for_simd: {
@@ -15573,6 +15575,12 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
if (NameModifier == OMPD_unknown || NameModifier == OMPD_parallel)
CaptureRegion = OMPD_target;
break;
+ case OMPD_teams_loop:
+ case OMPD_target_teams_loop:
+ // For [target] teams loop, assume capture region is 'teams' so it's
+ // available for codegen later to use if/when necessary.
+ CaptureRegion = OMPD_teams;
+ break;
case OMPD_target_teams_distribute_parallel_for_simd:
if (OpenMPVersion >= 50 &&
(NameModifier == OMPD_unknown || NameModifier == OMPD_simd)) {
@@ -15580,7 +15588,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
break;
}
[[fallthrough]];
- case OMPD_target_teams_loop:
case OMPD_target_teams_distribute_parallel_for:
// If this clause applies to the nested 'parallel' region, capture within
// the 'teams' region, otherwise do not capture.
@@ -15703,7 +15710,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_loop:
- case OMPD_teams_loop:
case OMPD_teams:
case OMPD_tile:
case OMPD_unroll:
diff --git a/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp
index fc83500a09f9846..ef07e607d1dbbf2 100644
--- a/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp
@@ -219,7 +219,7 @@ int bar(int n){
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP41]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
@@ -276,7 +276,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
@@ -313,30 +313,38 @@ int bar(int n){
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP18]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4:[0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
+// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1
+// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -344,11 +352,12 @@ int bar(int n){
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
@@ -360,6 +369,7 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
@@ -432,7 +442,7 @@ int bar(int n){
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK1-NEXT: ret void
//
//
@@ -502,7 +512,7 @@ int bar(int n){
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
// CHECK1-NEXT: ret void
//
//
@@ -637,7 +647,7 @@ int bar(int n){
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK1-NEXT: ret void
//
//
@@ -730,7 +740,7 @@ int bar(int n){
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
// CHECK1-NEXT: ret void
//
//
@@ -904,7 +914,7 @@ int bar(int n){
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP42]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP42]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
@@ -1040,7 +1050,7 @@ int bar(int n){
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP28]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP28]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
@@ -1208,7 +1218,7 @@ int bar(int n){
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP43]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP43]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
@@ -1305,7 +1315,7 @@ int bar(int n){
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
@@ -1465,7 +1475,7 @@ int bar(int n){
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP41]])
+// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: ret void
@@ -1522,7 +1532,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
@@ -1559,30 +1569,38 @@ int bar(int n){
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
+// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP18]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33
-// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4:[0-9]+]] {
+// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK2-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
+// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1
+// CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK2-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -1590,11 +1608,12 @@ int bar(int n){
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined
-// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
@@ -1606,6 +1625,7 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
@@ -1678,7 +1698,7 @@ int bar(int n){
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]])
+// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK2-NEXT: ret void
//
//
@@ -1748,7 +1768,7 @@ int bar(int n){
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
+// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
// CHECK2-NEXT: ret void
//
//
@@ -1883,7 +1903,7 @@ int bar(int n){
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]])
+// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK2-NEXT: ret void
//
//
@@ -1976,7 +1996,7 @@ int bar(int n){
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
+// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
// CHECK2-NEXT: ret void
//
//
@@ -2149,7 +2169,7 @@ int bar(int n){
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP44]])
+// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP44]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: ret void
@@ -2281,7 +2301,7 @@ int bar(int n){
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP28]])
+// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP28]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: ret void
@@ -2449,7 +2469,7 @@ int bar(int n){
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP43]])
+// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP43]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: ret void
@@ -2546,7 +2566,7 @@ int bar(int n){
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
+// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: ret void
@@ -2704,7 +2724,7 @@ int bar(int n){
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP39]])
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP39]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
// CHECK3-NEXT: ret void
@@ -2759,7 +2779,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
@@ -2794,30 +2814,38 @@ int bar(int n){
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP18]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33
-// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4:[0-9]+]] {
+// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
+// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment, ptr [[DYN_PTR]])
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
+// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1
+// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -2825,11 +2853,12 @@ int bar(int n){
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
@@ -2841,6 +2870,7 @@ int bar(int n){
// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
+// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
@@ -2911,7 +2941,7 @@ int bar(int n){
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]])
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK3-NEXT: ret void
//
//
@@ -2977,7 +3007,7 @@ int bar(int n){
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
// CHECK3-NEXT: ret void
//
//
@@ -3110,7 +3140,7 @@ int bar(int n){
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]])
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK3-NEXT: ret void
//
//
@@ -3198,7 +3228,7 @@ int bar(int n){
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
// CHECK3-NEXT: ret void
//
//
@@ -3374,7 +3404,7 @@ int bar(int n){
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP44]])
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP44]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
// CHECK3-NEXT: ret void
@@ -3511,7 +3541,7 @@ int bar(int n){
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP28]])
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP28]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
// CHECK3-NEXT: ret void
@@ -3677,7 +3707,7 @@ int bar(int n){
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP41]])
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
// CHECK3-NEXT: ret void
@@ -3769,7 +3799,7 @@ int bar(int n){
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
// CHECK3-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp
index ef26c9b1003ac55..c48bc30fa971214 100644
--- a/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp
@@ -30,17 +30,20 @@ int main(int argc, char **argv) {
#endif
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
@@ -50,9 +53,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
+// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -60,56 +68,55 @@ int main(int argc, char **argv) {
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
+// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
-// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
+// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
@@ -122,177 +129,54 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
-// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARGC_CASTED]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr
-// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr
-// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
-// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr
-// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8
-// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
-// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]]
-// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
-// CHECK1: cond.true10:
-// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: br label [[COND_END12:%.*]]
-// CHECK1: cond.false11:
-// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: br label [[COND_END12]]
-// CHECK1: cond.end12:
-// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ]
-// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP41]])
-// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
-// CHECK1: omp.precond.end:
-// CHECK1-NEXT: ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
-// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
-// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
-// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64
-// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
-// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR5:[0-9]+]]
-// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR5]]
+// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR4:[0-9]+]]
+// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR4]]
// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]]
-// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR5]]
+// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR4]]
// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]]
// CHECK1-NEXT: store i32 [[ADD10]], ptr [[TMP0]], align 4
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], 1
// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP16]])
+// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24
-// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
@@ -302,9 +186,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
+// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK2-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]]
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -312,56 +201,55 @@ int main(int argc, char **argv) {
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined
-// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4
+// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
-// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
+// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
@@ -374,155 +262,34 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
-// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP16]], ptr [[ARGC_CASTED]], align 4
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to ptr
-// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 4
-// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to ptr
-// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
-// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to ptr
-// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4
-// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4
-// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4)
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], [[TMP28]]
-// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
-// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]]
-// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
-// CHECK2: cond.true10:
-// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: br label [[COND_END12:%.*]]
-// CHECK2: cond.false11:
-// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: br label [[COND_END12]]
-// CHECK2: cond.end12:
-// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE10]] ], [ [[TMP36]], [[COND_FALSE11]] ]
-// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK2: omp.inner.for.end:
-// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP39]])
-// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
-// CHECK2: omp.precond.end:
-// CHECK2-NEXT: ret void
-//
-//
-// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined
-// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] {
-// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
-// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
-// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
-// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]]
-// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
+// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[I3]], align 4
-// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I3]]) #[[ATTR5:[0-9]+]]
-// CHECK2-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR5]]
-// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[CALL]], [[CALL5]]
-// CHECK2-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR5]]
-// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD6]], [[CALL7]]
-// CHECK2-NEXT: store i32 [[ADD8]], ptr [[TMP0]], align 4
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
+// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR4:[0-9]+]]
+// CHECK2-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR4]]
+// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]]
+// CHECK2-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR4]]
+// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]]
+// CHECK2-NEXT: store i32 [[ADD10]], ptr [[TMP0]], align 4
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], 1
+// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP16]])
+// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
+// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: ret void
diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp
index ad84510e7f8ab88..adb4bafd4435fb6 100644
--- a/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp
+++ b/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp
@@ -223,7 +223,7 @@ int target_teams_fun(int *g){
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP23]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
@@ -280,7 +280,7 @@ int target_teams_fun(int *g){
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
@@ -324,7 +324,7 @@ int target_teams_fun(int *g){
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
@@ -437,7 +437,7 @@ int target_teams_fun(int *g){
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
@@ -543,7 +543,7 @@ int target_teams_fun(int *g){
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
@@ -848,7 +848,7 @@ int target_teams_fun(int *g){
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP23]])
+// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: ret void
@@ -905,7 +905,7 @@ int target_teams_fun(int *g){
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
@@ -949,7 +949,7 @@ int target_teams_fun(int *g){
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
+// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: ret void
@@ -1062,7 +1062,7 @@ int target_teams_fun(int *g){
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]])
+// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: ret void
@@ -1168,7 +1168,7 @@ int target_teams_fun(int *g){
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
+// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: ret void
@@ -1478,7 +1478,7 @@ int target_teams_fun(int *g){
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]])
+// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
// CHECK4-NEXT: br label [[OMP_PRECOND_END]]
// CHECK4: omp.precond.end:
// CHECK4-NEXT: ret void
@@ -1533,7 +1533,7 @@ int target_teams_fun(int *g){
// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
@@ -1576,7 +1576,7 @@ int target_teams_fun(int *g){
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
+// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]])
// CHECK4-NEXT: br label [[OMP_PRECOND_END]]
// CHECK4: omp.precond.end:
// CHECK4-NEXT: ret void
@@ -1687,7 +1687,7 @@ int target_teams_fun(int *g){
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
+// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
// CHECK4-NEXT: br label [[OMP_PRECOND_END]]
// CHECK4: omp.precond.end:
// CHECK4-NEXT: ret void
@@ -1790,7 +1790,7 @@ int target_teams_fun(int *g){
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
+// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
// CHECK4-NEXT: br label [[OMP_PRECOND_END]]
// CHECK4: omp.precond.end:
// CHECK4-NEXT: ret void
@@ -1914,7 +1914,7 @@ int target_teams_fun(int *g){
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP23]])
+// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]])
// CHECK10-NEXT: br label [[OMP_PRECOND_END]]
// CHECK10: omp.precond.end:
// CHECK10-NEXT: ret void
@@ -1971,7 +1971,7 @@ int target_teams_fun(int *g){
// CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK10-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
@@ -2015,7 +2015,7 @@ int target_teams_fun(int *g){
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
+// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]])
// CHECK10-NEXT: br label [[OMP_PRECOND_END]]
// CHECK10: omp.precond.end:
// CHECK10-NEXT: ret void
@@ -2130,7 +2130,7 @@ int target_teams_fun(int *g){
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]])
+// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]])
// CHECK10-NEXT: br label [[OMP_PRECOND_END]]
// CHECK10: omp.precond.end:
// CHECK10-NEXT: ret void
@@ -2236,7 +2236,7 @@ int target_teams_fun(int *g){
// CHECK10: omp.loop.exit:
// CHECK10-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
+// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
// CHECK10-NEXT: br label [[OMP_PRECOND_END]]
// CHECK10: omp.precond.end:
// CHECK10-NEXT: ret void
@@ -2351,7 +2351,7 @@ int target_teams_fun(int *g){
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK12-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]])
+// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
// CHECK12-NEXT: br label [[OMP_PRECOND_END]]
// CHECK12: omp.precond.end:
// CHECK12-NEXT: ret void
@@ -2406,7 +2406,7 @@ int target_teams_fun(int *g){
// CHECK12-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK12-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
@@ -2449,7 +2449,7 @@ int target_teams_fun(int *g){
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK12-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
+// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]])
// CHECK12-NEXT: br label [[OMP_PRECOND_END]]
// CHECK12: omp.precond.end:
// CHECK12-NEXT: ret void
@@ -2562,7 +2562,7 @@ int target_teams_fun(int *g){
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK12-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
+// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
// CHECK12-NEXT: br label [[OMP_PRECOND_END]]
// CHECK12: omp.precond.end:
// CHECK12-NEXT: ret void
@@ -2665,7 +2665,7 @@ int target_teams_fun(int *g){
// CHECK12: omp.loop.exit:
// CHECK12-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK12-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
+// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]])
// CHECK12-NEXT: br label [[OMP_PRECOND_END]]
// CHECK12: omp.precond.end:
// CHECK12-NEXT: ret void
diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp
index 3b1af7618794db6..43db4255a337f19 100644
--- a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp
@@ -1312,7 +1312,7 @@ int foo() {
// IR-GPU: omp.loop.exit:
// IR-GPU-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// IR-GPU-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
-// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP33]])
+// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP33]])
// IR-GPU-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// IR-GPU-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
// IR-GPU-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -1418,7 +1418,7 @@ int foo() {
// IR-GPU: omp.arrayinit.done:
// IR-GPU-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
-// IR-GPU-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// IR-GPU-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
// IR-GPU-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4
// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
@@ -1466,7 +1466,7 @@ int foo() {
// IR-GPU: omp.loop.exit:
// IR-GPU-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4
-// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP19]])
+// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP19]])
// IR-GPU-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
// IR-GPU-NEXT: store ptr [[SUM4_ASCAST]], ptr [[TMP20]], align 8
// IR-GPU-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i64 400, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func)
@@ -2001,7 +2001,7 @@ int foo() {
// IR: omp.loop.exit:
// IR-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]])
+// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]])
// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
// IR-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
// IR-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -2015,7 +2015,7 @@ int foo() {
// IR-NEXT: store ptr [[SUM1]], ptr [[TMP22]], align 8
// IR-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// IR-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// IR-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// IR-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
@@ -2036,7 +2036,7 @@ int foo() {
// IR-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP26]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]]
// IR: omp.arraycpy.done10:
-// IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
+// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR: .omp.reduction.case2:
// IR-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100
@@ -2052,7 +2052,6 @@ int foo() {
// IR-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP29]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]]
// IR: omp.arraycpy.done18:
-// IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR: .omp.reduction.default:
// IR-NEXT: ret void
@@ -2109,7 +2108,7 @@ int foo() {
// IR: omp.arrayinit.done:
// IR-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
-// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99
// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -2166,12 +2165,12 @@ int foo() {
// IR: omp.loop.exit:
// IR-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
+// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]])
// IR-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// IR-NEXT: store ptr [[SUM4]], ptr [[TMP21]], align 8
// IR-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-// IR-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// IR-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// IR-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
@@ -2192,7 +2191,7 @@ int foo() {
// IR-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]]
// IR: omp.arraycpy.done19:
-// IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var)
+// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var)
// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR: .omp.reduction.case2:
// IR-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100
@@ -2208,7 +2207,6 @@ int foo() {
// IR-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP28]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]]
// IR: omp.arraycpy.done27:
-// IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var)
// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR: .omp.reduction.default:
// IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
@@ -2398,7 +2396,7 @@ int foo() {
// IR-PCH: omp.loop.exit:
// IR-PCH-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
-// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]])
+// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]])
// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
// IR-PCH-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
// IR-PCH-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
@@ -2412,7 +2410,7 @@ int foo() {
// IR-PCH-NEXT: store ptr [[SUM1]], ptr [[TMP22]], align 8
// IR-PCH-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// IR-PCH-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// IR-PCH-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// IR-PCH-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
@@ -2433,7 +2431,7 @@ int foo() {
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP26]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]]
// IR-PCH: omp.arraycpy.done10:
-// IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
+// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR-PCH: .omp.reduction.case2:
// IR-PCH-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100
@@ -2449,7 +2447,6 @@ int foo() {
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP29]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]]
// IR-PCH: omp.arraycpy.done18:
-// IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR-PCH: .omp.reduction.default:
// IR-PCH-NEXT: ret void
@@ -2506,7 +2503,7 @@ int foo() {
// IR-PCH: omp.arrayinit.done:
// IR-PCH-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
-// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99
// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -2563,12 +2560,12 @@ int foo() {
// IR-PCH: omp.loop.exit:
// IR-PCH-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
+// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]])
// IR-PCH-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// IR-PCH-NEXT: store ptr [[SUM4]], ptr [[TMP21]], align 8
// IR-PCH-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-// IR-PCH-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// IR-PCH-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// IR-PCH-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
@@ -2589,7 +2586,7 @@ int foo() {
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]]
// IR-PCH: omp.arraycpy.done19:
-// IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var)
+// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var)
// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR-PCH: .omp.reduction.case2:
// IR-PCH-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100
@@ -2605,7 +2602,6 @@ int foo() {
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP28]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]]
// IR-PCH: omp.arraycpy.done27:
-// IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var)
// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR-PCH: .omp.reduction.default:
// IR-PCH-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen_as_distribute.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen_as_distribute.cpp
new file mode 100644
index 000000000000000..8775b14e0332fd3
--- /dev/null
+++ b/clang/test/OpenMP/target_teams_generic_loop_codegen_as_distribute.cpp
@@ -0,0 +1,1685 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 2
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=IR-GPU
+
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH
+extern int foo(int i);
+
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+int N = 100000;
+int main()
+{
+ int i;
+ int a[N];
+ int b[N];
+
+ // Presence of #loop bind(parallel). Cannot use 'parallel for', must
+ // use 'distribute'
+ #pragma omp target teams loop
+ for (i=0; i < N; i++) {
+ #pragma omp loop bind(parallel)
+ for (int j=0; j < N; j++) {
+ a[i] = b[i] * N + j;
+ }
+ }
+
+ // Presence of call. Cannot use 'parallel for', must use 'distribute' when
+ // assume-no-neseted-parallelism isn't specified.
+ #pragma omp target teams loop
+ for (i=0; i < N; i++) {
+ for (int j=0; j < N; j++) {
+ a[i] = b[i] * N + foo(j);
+ }
+ }
+ return 0;
+}
+#endif
+// IR-GPU-LABEL: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27
+// IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// IR-GPU-NEXT: entry:
+// IR-GPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// IR-GPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_kernel_environment to ptr), ptr [[DYN_PTR]])
+// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
+// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// IR-GPU: user_code.entry:
+// IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
+// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2:[0-9]+]]
+// IR-GPU-NEXT: call void @__kmpc_target_deinit()
+// IR-GPU-NEXT: ret void
+// IR-GPU: worker.exit:
+// IR-GPU-NEXT: ret void
+//
+//
+// IR-GPU-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_omp_outlined
+// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1:[0-9]+]] {
+// IR-GPU-NEXT: entry:
+// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[I5:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// IR-GPU-NEXT: [[I5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I5]] to ptr
+// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-GPU: omp.precond.then:
+// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// IR-GPU-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
+// IR-GPU-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
+// IR-GPU-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
+// IR-GPU-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-GPU: cond.true:
+// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[COND_END:%.*]]
+// IR-GPU: cond.false:
+// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[COND_END]]
+// IR-GPU: cond.end:
+// IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
+// IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-GPU: omp.inner.for.cond:
+// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1
+// IR-GPU-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]]
+// IR-GPU-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-GPU: omp.inner.for.body:
+// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
+// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
+// IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP21]], ptr [[N_CASTED_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// IR-GPU-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8
+// IR-GPU-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
+// IR-GPU-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8
+// IR-GPU-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// IR-GPU-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP22]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8
+// IR-GPU-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
+// IR-GPU-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP0]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8
+// IR-GPU-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
+// IR-GPU-NEXT: store ptr [[TMP1]], ptr [[TMP31]], align 8
+// IR-GPU-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 5
+// IR-GPU-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP2]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP33]], ptr [[TMP32]], align 8
+// IR-GPU-NEXT: [[TMP34:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 6
+// IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP34]], align 8
+// IR-GPU-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
+// IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP36]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 7)
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-GPU: omp.inner.for.inc:
+// IR-GPU-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
+// IR-GPU-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]]
+// IR-GPU-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP41]], [[TMP42]]
+// IR-GPU-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]]
+// IR-GPU-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
+// IR-GPU: cond.true12:
+// IR-GPU-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[COND_END14:%.*]]
+// IR-GPU: cond.false13:
+// IR-GPU-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[COND_END14]]
+// IR-GPU: cond.end14:
+// IR-GPU-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE12]] ], [ [[TMP46]], [[COND_FALSE13]] ]
+// IR-GPU-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-GPU: omp.inner.for.end:
+// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-GPU: omp.loop.exit:
+// IR-GPU-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4
+// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP49]])
+// IR-GPU-NEXT: br label [[OMP_PRECOND_END]]
+// IR-GPU: omp.precond.end:
+// IR-GPU-NEXT: ret void
+//
+//
+// IR-GPU-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_omp_outlined_omp_outlined
+// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] {
+// IR-GPU-NEXT: entry:
+// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[I6:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IV9:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[_TMP10:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_LB18:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_UB19:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_STRIDE20:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST21:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[J22:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// IR-GPU-NEXT: [[I6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I6]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IV9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV9]] to ptr
+// IR-GPU-NEXT: [[TMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP10]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_11]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_12]] to ptr
+// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_LB18_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB18]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_UB19_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB19]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_STRIDE20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE20]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST21]] to ptr
+// IR-GPU-NEXT: [[J22_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J22]] to ptr
+// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END41:%.*]]
+// IR-GPU: omp.precond.then:
+// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32
+// IR-GPU-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32
+// IR-GPU-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+// IR-GPU-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-GPU: omp.inner.for.cond:
+// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64
+// IR-GPU-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP14]]
+// IR-GPU-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
+// IR-GPU: omp.inner.for.body:
+// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
+// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// IR-GPU-NEXT: store i32 [[ADD]], ptr [[I6_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_11_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11_ASCAST]], align 4
+// IR-GPU-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP17]], 0
+// IR-GPU-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1
+// IR-GPU-NEXT: [[SUB15:%.*]] = sub nsw i32 [[DIV14]], 1
+// IR-GPU-NEXT: store i32 [[SUB15]], ptr [[DOTCAPTURE_EXPR_12_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11_ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP16:%.*]] = icmp slt i32 0, [[TMP18]]
+// IR-GPU-NEXT: br i1 [[CMP16]], label [[OMP_PRECOND_THEN17:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-GPU: omp.precond.then17:
+// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_LB18_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_UB19_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE20_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST21_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
+// IR-GPU-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP21]], i32 33, ptr [[DOTOMP_IS_LAST21_ASCAST]], ptr [[DOTOMP_LB18_ASCAST]], ptr [[DOTOMP_UB19_ASCAST]], ptr [[DOTOMP_STRIDE20_ASCAST]], i32 1, i32 1)
+// IR-GPU-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
+// IR-GPU: omp.dispatch.cond:
+// IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB19_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12_ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP23]]
+// IR-GPU-NEXT: br i1 [[CMP23]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-GPU: cond.true:
+// IR-GPU-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[COND_END:%.*]]
+// IR-GPU: cond.false:
+// IR-GPU-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB19_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[COND_END]]
+// IR-GPU: cond.end:
+// IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ [[TMP24]], [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ]
+// IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB19_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB18_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV9_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV9_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB19_ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP24:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]]
+// IR-GPU-NEXT: br i1 [[CMP24]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
+// IR-GPU: omp.dispatch.body:
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND25:%.*]]
+// IR-GPU: omp.inner.for.cond25:
+// IR-GPU-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV9_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB19_ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP26:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]]
+// IR-GPU-NEXT: br i1 [[CMP26]], label [[OMP_INNER_FOR_BODY27:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-GPU: omp.inner.for.body27:
+// IR-GPU-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV9_ASCAST]], align 4
+// IR-GPU-NEXT: [[MUL28:%.*]] = mul nsw i32 [[TMP31]], 1
+// IR-GPU-NEXT: [[ADD29:%.*]] = add nsw i32 0, [[MUL28]]
+// IR-GPU-NEXT: store i32 [[ADD29]], ptr [[J22_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP32:%.*]] = load i32, ptr [[I6_ASCAST]], align 4
+// IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP32]] to i64
+// IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// IR-GPU-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// IR-GPU-NEXT: [[TMP34:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: [[MUL30:%.*]] = mul nsw i32 [[TMP33]], [[TMP34]]
+// IR-GPU-NEXT: [[TMP35:%.*]] = load i32, ptr [[J22_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD31:%.*]] = add nsw i32 [[MUL30]], [[TMP35]]
+// IR-GPU-NEXT: [[TMP36:%.*]] = load i32, ptr [[I6_ASCAST]], align 4
+// IR-GPU-NEXT: [[IDXPROM32:%.*]] = sext i32 [[TMP36]] to i64
+// IR-GPU-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM32]]
+// IR-GPU-NEXT: store i32 [[ADD31]], ptr [[ARRAYIDX33]], align 4
+// IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR-GPU: omp.body.continue:
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-GPU: omp.inner.for.inc:
+// IR-GPU-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV9_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD34:%.*]] = add nsw i32 [[TMP37]], 1
+// IR-GPU-NEXT: store i32 [[ADD34]], ptr [[DOTOMP_IV9_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND25]]
+// IR-GPU: omp.inner.for.end:
+// IR-GPU-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
+// IR-GPU: omp.dispatch.inc:
+// IR-GPU-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB18_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE20_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD35:%.*]] = add nsw i32 [[TMP38]], [[TMP39]]
+// IR-GPU-NEXT: store i32 [[ADD35]], ptr [[DOTOMP_LB18_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_UB19_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE20_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP40]], [[TMP41]]
+// IR-GPU-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_UB19_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[OMP_DISPATCH_COND]]
+// IR-GPU: omp.dispatch.end:
+// IR-GPU-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4
+// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP43]])
+// IR-GPU-NEXT: br label [[OMP_PRECOND_END]]
+// IR-GPU: omp.precond.end:
+// IR-GPU-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4
+// IR-GPU-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4:[0-9]+]] to ptr), i32 [[TMP45]])
+// IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]]
+// IR-GPU: omp.body.continue37:
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]]
+// IR-GPU: omp.inner.for.inc38:
+// IR-GPU-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD39:%.*]] = add nsw i32 [[TMP46]], [[TMP47]]
+// IR-GPU-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-GPU: omp.inner.for.end40:
+// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-GPU: omp.loop.exit:
+// IR-GPU-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4
+// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP49]])
+// IR-GPU-NEXT: br label [[OMP_PRECOND_END41]]
+// IR-GPU: omp.precond.end41:
+// IR-GPU-NEXT: ret void
+//
+//
+// IR-GPU-LABEL: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37
+// IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] {
+// IR-GPU-NEXT: entry:
+// IR-GPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// IR-GPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_kernel_environment to ptr), ptr [[DYN_PTR]])
+// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
+// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// IR-GPU: user_code.entry:
+// IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
+// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2]]
+// IR-GPU-NEXT: call void @__kmpc_target_deinit()
+// IR-GPU-NEXT: ret void
+// IR-GPU: worker.exit:
+// IR-GPU-NEXT: ret void
+//
+//
+// IR-GPU-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_omp_outlined
+// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] {
+// IR-GPU-NEXT: entry:
+// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[I5:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// IR-GPU-NEXT: [[I5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I5]] to ptr
+// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-GPU: omp.precond.then:
+// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
+// IR-GPU-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
+// IR-GPU-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-GPU: cond.true:
+// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[COND_END:%.*]]
+// IR-GPU: cond.false:
+// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[COND_END]]
+// IR-GPU: cond.end:
+// IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
+// IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-GPU: omp.inner.for.cond:
+// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
+// IR-GPU-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-GPU: omp.inner.for.body:
+// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1
+// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// IR-GPU-NEXT: store i32 [[ADD]], ptr [[I5_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[FOR_COND:%.*]]
+// IR-GPU: for.cond:
+// IR-GPU-NEXT: [[TMP18:%.*]] = load i32, ptr [[J_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP18]], [[TMP19]]
+// IR-GPU-NEXT: br i1 [[CMP8]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// IR-GPU: for.body:
+// IR-GPU-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5_ASCAST]], align 4
+// IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64
+// IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP21]], [[TMP22]]
+// IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[J_ASCAST]], align 4
+// IR-GPU-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP23]]) #[[ATTR6:[0-9]+]]
+// IR-GPU-NEXT: [[ADD10:%.*]] = add nsw i32 [[MUL9]], [[CALL]]
+// IR-GPU-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5_ASCAST]], align 4
+// IR-GPU-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP24]] to i64
+// IR-GPU-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM11]]
+// IR-GPU-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4
+// IR-GPU-NEXT: br label [[FOR_INC:%.*]]
+// IR-GPU: for.inc:
+// IR-GPU-NEXT: [[TMP25:%.*]] = load i32, ptr [[J_ASCAST]], align 4
+// IR-GPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1
+// IR-GPU-NEXT: store i32 [[INC]], ptr [[J_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
+// IR-GPU: for.end:
+// IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR-GPU: omp.body.continue:
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-GPU: omp.inner.for.inc:
+// IR-GPU-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP26]], 1
+// IR-GPU-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-GPU: omp.inner.for.end:
+// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-GPU: omp.loop.exit:
+// IR-GPU-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
+// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP28]])
+// IR-GPU-NEXT: br label [[OMP_PRECOND_END]]
+// IR-GPU: omp.precond.end:
+// IR-GPU-NEXT: ret void
+//
+//
+// IR-LABEL: define dso_local noundef i32 @main
+// IR-SAME: () #[[ATTR0:[0-9]+]] {
+// IR-NEXT: entry:
+// IR-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// IR-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
+// IR-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
+// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-NEXT: [[N_CASTED2:%.*]] = alloca i64, align 8
+// IR-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// IR-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4
+// IR-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+// IR-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
+// IR-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
+// IR-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16
+// IR-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
+// IR-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4
+// IR-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+// IR-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16
+// IR-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
+// IR-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4
+// IR-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4
+// IR-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]]
+// IR-NEXT: [[TMP7:%.*]] = load i32, ptr @N, align 4
+// IR-NEXT: store i32 [[TMP7]], ptr [[N_CASTED2]], align 4
+// IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[N_CASTED2]], align 8
+// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37(i64 [[TMP8]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3]]
+// IR-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// IR-NEXT: [[TMP9:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
+// IR-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP9]])
+// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[RETVAL]], align 4
+// IR-NEXT: ret i32 [[TMP10]]
+//
+//
+// IR-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27
+// IR-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] {
+// IR-NEXT: entry:
+// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4
+// IR-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-NEXT: ret void
+//
+//
+// IR-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27.omp_outlined
+// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-NEXT: entry:
+// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
+// IR-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-NEXT: [[I5:%.*]] = alloca i32, align 4
+// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: store i32 0, ptr [[I]], align 4
+// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR: omp.precond.then:
+// IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
+// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
+// IR-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR: cond.true:
+// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: br label [[COND_END:%.*]]
+// IR: cond.false:
+// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: br label [[COND_END]]
+// IR: cond.end:
+// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
+// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR: omp.inner.for.cond:
+// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
+// IR-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR: omp.inner.for.body:
+// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
+// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
+// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4
+// IR-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27.omp_outlined.omp_outlined, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR: omp.inner.for.inc:
+// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]]
+// IR-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR: omp.inner.for.end:
+// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR: omp.loop.exit:
+// IR-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
+// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]])
+// IR-NEXT: br label [[OMP_PRECOND_END]]
+// IR: omp.precond.end:
+// IR-NEXT: ret void
+//
+//
+// IR-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27.omp_outlined.omp_outlined
+// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-NEXT: entry:
+// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
+// IR-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-NEXT: [[I6:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_IV9:%.*]] = alloca i32, align 4
+// IR-NEXT: [[_TMP10:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4
+// IR-NEXT: [[J:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_LB18:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_UB19:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_STRIDE20:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_IS_LAST21:%.*]] = alloca i32, align 4
+// IR-NEXT: [[J22:%.*]] = alloca i32, align 4
+// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: store i32 0, ptr [[I]], align 4
+// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END43:%.*]]
+// IR: omp.precond.then:
+// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
+// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32
+// IR-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32
+// IR-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
+// IR-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
+// IR-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR: cond.true:
+// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: br label [[COND_END:%.*]]
+// IR: cond.false:
+// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: br label [[COND_END]]
+// IR: cond.end:
+// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ]
+// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// IR-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR: omp.inner.for.cond:
+// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
+// IR-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END41:%.*]]
+// IR: omp.inner.for.body:
+// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1
+// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// IR-NEXT: store i32 [[ADD]], ptr [[I6]], align 4
+// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_11]], align 4
+// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11]], align 4
+// IR-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP21]], 0
+// IR-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1
+// IR-NEXT: [[SUB15:%.*]] = sub nsw i32 [[DIV14]], 1
+// IR-NEXT: store i32 [[SUB15]], ptr [[DOTCAPTURE_EXPR_12]], align 4
+// IR-NEXT: store i32 0, ptr [[J]], align 4
+// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11]], align 4
+// IR-NEXT: [[CMP16:%.*]] = icmp slt i32 0, [[TMP22]]
+// IR-NEXT: br i1 [[CMP16]], label [[OMP_PRECOND_THEN17:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR: omp.precond.then17:
+// IR-NEXT: store i32 0, ptr [[DOTOMP_LB18]], align 4
+// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12]], align 4
+// IR-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_UB19]], align 4
+// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE20]], align 4
+// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST21]], align 4
+// IR-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
+// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST21]], ptr [[DOTOMP_LB18]], ptr [[DOTOMP_UB19]], ptr [[DOTOMP_STRIDE20]], i32 1, i32 1)
+// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB19]], align 4
+// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12]], align 4
+// IR-NEXT: [[CMP23:%.*]] = icmp sgt i32 [[TMP26]], [[TMP27]]
+// IR-NEXT: br i1 [[CMP23]], label [[COND_TRUE24:%.*]], label [[COND_FALSE25:%.*]]
+// IR: cond.true24:
+// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12]], align 4
+// IR-NEXT: br label [[COND_END26:%.*]]
+// IR: cond.false25:
+// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB19]], align 4
+// IR-NEXT: br label [[COND_END26]]
+// IR: cond.end26:
+// IR-NEXT: [[COND27:%.*]] = phi i32 [ [[TMP28]], [[COND_TRUE24]] ], [ [[TMP29]], [[COND_FALSE25]] ]
+// IR-NEXT: store i32 [[COND27]], ptr [[DOTOMP_UB19]], align 4
+// IR-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_LB18]], align 4
+// IR-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_IV9]], align 4
+// IR-NEXT: br label [[OMP_INNER_FOR_COND28:%.*]]
+// IR: omp.inner.for.cond28:
+// IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV9]], align 4
+// IR-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB19]], align 4
+// IR-NEXT: [[CMP29:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]]
+// IR-NEXT: br i1 [[CMP29]], label [[OMP_INNER_FOR_BODY30:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR: omp.inner.for.body30:
+// IR-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV9]], align 4
+// IR-NEXT: [[MUL31:%.*]] = mul nsw i32 [[TMP33]], 1
+// IR-NEXT: [[ADD32:%.*]] = add nsw i32 0, [[MUL31]]
+// IR-NEXT: store i32 [[ADD32]], ptr [[J22]], align 4
+// IR-NEXT: [[TMP34:%.*]] = load i32, ptr [[I6]], align 4
+// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64
+// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// IR-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// IR-NEXT: [[TMP36:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: [[MUL33:%.*]] = mul nsw i32 [[TMP35]], [[TMP36]]
+// IR-NEXT: [[TMP37:%.*]] = load i32, ptr [[J22]], align 4
+// IR-NEXT: [[ADD34:%.*]] = add nsw i32 [[MUL33]], [[TMP37]]
+// IR-NEXT: [[TMP38:%.*]] = load i32, ptr [[I6]], align 4
+// IR-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP38]] to i64
+// IR-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM35]]
+// IR-NEXT: store i32 [[ADD34]], ptr [[ARRAYIDX36]], align 4
+// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR: omp.body.continue:
+// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR: omp.inner.for.inc:
+// IR-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IV9]], align 4
+// IR-NEXT: [[ADD37:%.*]] = add nsw i32 [[TMP39]], 1
+// IR-NEXT: store i32 [[ADD37]], ptr [[DOTOMP_IV9]], align 4
+// IR-NEXT: br label [[OMP_INNER_FOR_COND28]]
+// IR: omp.inner.for.end:
+// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR: omp.loop.exit:
+// IR-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
+// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
+// IR-NEXT: br label [[OMP_PRECOND_END]]
+// IR: omp.precond.end:
+// IR-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4
+// IR-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP43]])
+// IR-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]]
+// IR: omp.body.continue38:
+// IR-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]]
+// IR: omp.inner.for.inc39:
+// IR-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[ADD40:%.*]] = add nsw i32 [[TMP44]], 1
+// IR-NEXT: store i32 [[ADD40]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR: omp.inner.for.end41:
+// IR-NEXT: br label [[OMP_LOOP_EXIT42:%.*]]
+// IR: omp.loop.exit42:
+// IR-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4
+// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP46]])
+// IR-NEXT: br label [[OMP_PRECOND_END43]]
+// IR: omp.precond.end43:
+// IR-NEXT: ret void
+//
+//
+// IR-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37
+// IR-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-NEXT: entry:
+// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4
+// IR-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-NEXT: ret void
+//
+//
+// IR-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined
+// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-NEXT: entry:
+// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
+// IR-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-NEXT: [[I5:%.*]] = alloca i32, align 4
+// IR-NEXT: [[J:%.*]] = alloca i32, align 4
+// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: store i32 0, ptr [[I]], align 4
+// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR: omp.precond.then:
+// IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
+// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
+// IR-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR: cond.true:
+// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: br label [[COND_END:%.*]]
+// IR: cond.false:
+// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: br label [[COND_END]]
+// IR: cond.end:
+// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
+// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR: omp.inner.for.cond:
+// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
+// IR-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR: omp.inner.for.body:
+// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1
+// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// IR-NEXT: store i32 [[ADD]], ptr [[I5]], align 4
+// IR-NEXT: store i32 0, ptr [[J]], align 4
+// IR-NEXT: br label [[FOR_COND:%.*]]
+// IR: for.cond:
+// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4
+// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP18]], [[TMP19]]
+// IR-NEXT: br i1 [[CMP8]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// IR: for.body:
+// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5]], align 4
+// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64
+// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP21]], [[TMP22]]
+// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[J]], align 4
+// IR-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP23]])
+// IR-NEXT: [[ADD10:%.*]] = add nsw i32 [[MUL9]], [[CALL]]
+// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4
+// IR-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP24]] to i64
+// IR-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM11]]
+// IR-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4
+// IR-NEXT: br label [[FOR_INC:%.*]]
+// IR: for.inc:
+// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[J]], align 4
+// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1
+// IR-NEXT: store i32 [[INC]], ptr [[J]], align 4
+// IR-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// IR: for.end:
+// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR: omp.body.continue:
+// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR: omp.inner.for.inc:
+// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP26]], 1
+// IR-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR: omp.inner.for.end:
+// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR: omp.loop.exit:
+// IR-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
+// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]])
+// IR-NEXT: br label [[OMP_PRECOND_END]]
+// IR: omp.precond.end:
+// IR-NEXT: ret void
+//
+//
+// IR-PCH-LABEL: define dso_local noundef i32 @main
+// IR-PCH-SAME: () #[[ATTR0:[0-9]+]] {
+// IR-PCH-NEXT: entry:
+// IR-PCH-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[N_CASTED2:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// IR-PCH-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4
+// IR-PCH-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+// IR-PCH-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
+// IR-PCH-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
+// IR-PCH-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16
+// IR-PCH-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
+// IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4
+// IR-PCH-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+// IR-PCH-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16
+// IR-PCH-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
+// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4
+// IR-PCH-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4
+// IR-PCH-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]]
+// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr @N, align 4
+// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[N_CASTED2]], align 4
+// IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[N_CASTED2]], align 8
+// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37(i64 [[TMP8]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3]]
+// IR-PCH-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// IR-PCH-NEXT: [[TMP9:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
+// IR-PCH-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP9]])
+// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[RETVAL]], align 4
+// IR-PCH-NEXT: ret i32 [[TMP10]]
+//
+//
+// IR-PCH-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27
+// IR-PCH-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] {
+// IR-PCH-NEXT: entry:
+// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4
+// IR-PCH-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-PCH-NEXT: ret void
+//
+//
+// IR-PCH-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27.omp_outlined
+// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-PCH-NEXT: entry:
+// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[I5:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[I]], align 4
+// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-PCH: omp.precond.then:
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-PCH-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
+// IR-PCH-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-PCH: cond.true:
+// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: br label [[COND_END:%.*]]
+// IR-PCH: cond.false:
+// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: br label [[COND_END]]
+// IR-PCH: cond.end:
+// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
+// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-PCH-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-PCH: omp.inner.for.cond:
+// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
+// IR-PCH-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-PCH: omp.inner.for.body:
+// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-PCH-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
+// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
+// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4
+// IR-PCH-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27.omp_outlined.omp_outlined, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-PCH: omp.inner.for.inc:
+// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]]
+// IR-PCH-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-PCH: omp.inner.for.end:
+// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-PCH: omp.loop.exit:
+// IR-PCH-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]])
+// IR-PCH-NEXT: br label [[OMP_PRECOND_END]]
+// IR-PCH: omp.precond.end:
+// IR-PCH-NEXT: ret void
+//
+//
+// IR-PCH-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27.omp_outlined.omp_outlined
+// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-PCH-NEXT: entry:
+// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[I6:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_IV9:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[_TMP10:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_LB18:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_UB19:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_STRIDE20:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_IS_LAST21:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[J22:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[I]], align 4
+// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END43:%.*]]
+// IR-PCH: omp.precond.then:
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
+// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-PCH-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32
+// IR-PCH-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-PCH-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32
+// IR-PCH-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
+// IR-PCH-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-PCH-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
+// IR-PCH-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-PCH: cond.true:
+// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: br label [[COND_END:%.*]]
+// IR-PCH: cond.false:
+// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: br label [[COND_END]]
+// IR-PCH: cond.end:
+// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ]
+// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// IR-PCH-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-PCH: omp.inner.for.cond:
+// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
+// IR-PCH-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END41:%.*]]
+// IR-PCH: omp.inner.for.body:
+// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1
+// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I6]], align 4
+// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_11]], align 4
+// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11]], align 4
+// IR-PCH-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP21]], 0
+// IR-PCH-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1
+// IR-PCH-NEXT: [[SUB15:%.*]] = sub nsw i32 [[DIV14]], 1
+// IR-PCH-NEXT: store i32 [[SUB15]], ptr [[DOTCAPTURE_EXPR_12]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4
+// IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11]], align 4
+// IR-PCH-NEXT: [[CMP16:%.*]] = icmp slt i32 0, [[TMP22]]
+// IR-PCH-NEXT: br i1 [[CMP16]], label [[OMP_PRECOND_THEN17:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-PCH: omp.precond.then17:
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB18]], align 4
+// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12]], align 4
+// IR-PCH-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_UB19]], align 4
+// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE20]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST21]], align 4
+// IR-PCH-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST21]], ptr [[DOTOMP_LB18]], ptr [[DOTOMP_UB19]], ptr [[DOTOMP_STRIDE20]], i32 1, i32 1)
+// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB19]], align 4
+// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12]], align 4
+// IR-PCH-NEXT: [[CMP23:%.*]] = icmp sgt i32 [[TMP26]], [[TMP27]]
+// IR-PCH-NEXT: br i1 [[CMP23]], label [[COND_TRUE24:%.*]], label [[COND_FALSE25:%.*]]
+// IR-PCH: cond.true24:
+// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12]], align 4
+// IR-PCH-NEXT: br label [[COND_END26:%.*]]
+// IR-PCH: cond.false25:
+// IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB19]], align 4
+// IR-PCH-NEXT: br label [[COND_END26]]
+// IR-PCH: cond.end26:
+// IR-PCH-NEXT: [[COND27:%.*]] = phi i32 [ [[TMP28]], [[COND_TRUE24]] ], [ [[TMP29]], [[COND_FALSE25]] ]
+// IR-PCH-NEXT: store i32 [[COND27]], ptr [[DOTOMP_UB19]], align 4
+// IR-PCH-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_LB18]], align 4
+// IR-PCH-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_IV9]], align 4
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND28:%.*]]
+// IR-PCH: omp.inner.for.cond28:
+// IR-PCH-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV9]], align 4
+// IR-PCH-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB19]], align 4
+// IR-PCH-NEXT: [[CMP29:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]]
+// IR-PCH-NEXT: br i1 [[CMP29]], label [[OMP_INNER_FOR_BODY30:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-PCH: omp.inner.for.body30:
+// IR-PCH-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV9]], align 4
+// IR-PCH-NEXT: [[MUL31:%.*]] = mul nsw i32 [[TMP33]], 1
+// IR-PCH-NEXT: [[ADD32:%.*]] = add nsw i32 0, [[MUL31]]
+// IR-PCH-NEXT: store i32 [[ADD32]], ptr [[J22]], align 4
+// IR-PCH-NEXT: [[TMP34:%.*]] = load i32, ptr [[I6]], align 4
+// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64
+// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// IR-PCH-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// IR-PCH-NEXT: [[TMP36:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: [[MUL33:%.*]] = mul nsw i32 [[TMP35]], [[TMP36]]
+// IR-PCH-NEXT: [[TMP37:%.*]] = load i32, ptr [[J22]], align 4
+// IR-PCH-NEXT: [[ADD34:%.*]] = add nsw i32 [[MUL33]], [[TMP37]]
+// IR-PCH-NEXT: [[TMP38:%.*]] = load i32, ptr [[I6]], align 4
+// IR-PCH-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP38]] to i64
+// IR-PCH-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM35]]
+// IR-PCH-NEXT: store i32 [[ADD34]], ptr [[ARRAYIDX36]], align 4
+// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR-PCH: omp.body.continue:
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-PCH: omp.inner.for.inc:
+// IR-PCH-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IV9]], align 4
+// IR-PCH-NEXT: [[ADD37:%.*]] = add nsw i32 [[TMP39]], 1
+// IR-PCH-NEXT: store i32 [[ADD37]], ptr [[DOTOMP_IV9]], align 4
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND28]]
+// IR-PCH: omp.inner.for.end:
+// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-PCH: omp.loop.exit:
+// IR-PCH-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
+// IR-PCH-NEXT: br label [[OMP_PRECOND_END]]
+// IR-PCH: omp.precond.end:
+// IR-PCH-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4
+// IR-PCH-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP43]])
+// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]]
+// IR-PCH: omp.body.continue38:
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]]
+// IR-PCH: omp.inner.for.inc39:
+// IR-PCH-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[ADD40:%.*]] = add nsw i32 [[TMP44]], 1
+// IR-PCH-NEXT: store i32 [[ADD40]], ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-PCH: omp.inner.for.end41:
+// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT42:%.*]]
+// IR-PCH: omp.loop.exit42:
+// IR-PCH-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP46]])
+// IR-PCH-NEXT: br label [[OMP_PRECOND_END43]]
+// IR-PCH: omp.precond.end43:
+// IR-PCH-NEXT: ret void
+//
+//
+// IR-PCH-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37
+// IR-PCH-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-PCH-NEXT: entry:
+// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4
+// IR-PCH-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-PCH-NEXT: ret void
+//
+//
+// IR-PCH-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined
+// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-PCH-NEXT: entry:
+// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[I5:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[I]], align 4
+// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-PCH: omp.precond.then:
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-PCH-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
+// IR-PCH-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-PCH: cond.true:
+// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: br label [[COND_END:%.*]]
+// IR-PCH: cond.false:
+// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: br label [[COND_END]]
+// IR-PCH: cond.end:
+// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
+// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-PCH-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-PCH: omp.inner.for.cond:
+// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
+// IR-PCH-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-PCH: omp.inner.for.body:
+// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1
+// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I5]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4
+// IR-PCH-NEXT: br label [[FOR_COND:%.*]]
+// IR-PCH: for.cond:
+// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4
+// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP18]], [[TMP19]]
+// IR-PCH-NEXT: br i1 [[CMP8]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// IR-PCH: for.body:
+// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5]], align 4
+// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64
+// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP21]], [[TMP22]]
+// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[J]], align 4
+// IR-PCH-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP23]])
+// IR-PCH-NEXT: [[ADD10:%.*]] = add nsw i32 [[MUL9]], [[CALL]]
+// IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4
+// IR-PCH-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP24]] to i64
+// IR-PCH-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM11]]
+// IR-PCH-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4
+// IR-PCH-NEXT: br label [[FOR_INC:%.*]]
+// IR-PCH: for.inc:
+// IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[J]], align 4
+// IR-PCH-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1
+// IR-PCH-NEXT: store i32 [[INC]], ptr [[J]], align 4
+// IR-PCH-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// IR-PCH: for.end:
+// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR-PCH: omp.body.continue:
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-PCH: omp.inner.for.inc:
+// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP26]], 1
+// IR-PCH-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-PCH: omp.inner.for.end:
+// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-PCH: omp.loop.exit:
+// IR-PCH-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]])
+// IR-PCH-NEXT: br label [[OMP_PRECOND_END]]
+// IR-PCH: omp.precond.end:
+// IR-PCH-NEXT: ret void
+//
diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen_as_parallel_for.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen_as_parallel_for.cpp
new file mode 100644
index 000000000000000..1aa57033eabcb54
--- /dev/null
+++ b/clang/test/OpenMP/target_teams_generic_loop_codegen_as_parallel_for.cpp
@@ -0,0 +1,3998 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=IR-GPU
+
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH
+
+
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp-assume-no-nested-parallelism -DNESTED -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-assume-no-nested-parallelism -DNESTED -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=IR-GPU-NESTED
+
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-assume-no-nested-parallelism -DNESTED -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-NESTED
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-assume-no-nested-parallelism -DNESTED -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -fopenmp-assume-no-nested-parallelism -DNESTED -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH-NESTED
+
+// expected-no-diagnostics
+
+#ifndef NESTED
+extern int omp_get_num_teams(void);
+#endif
+
+#ifndef HEADER
+#define HEADER
+extern int foo(int i);
+
+int N = 100000;
+int main()
+{
+ int a[N];
+ int b[N];
+
+#ifndef NESTED
+ // Should be transformed into 'target teams distribute parallel for'
+ #pragma omp target teams loop
+ for (int j = 0; j != N; j++)
+ a[j]=b[j];
+
+ // Should be transformed into 'target teams distribute parallel for'
+ #pragma omp target teams loop collapse(2)
+ for (int i = 0; i < N; i++) {
+ for (int j = 0; j < N; j++) {
+ a[i] = b[i] * N + j;
+ }
+ }
+
+ int nt = 0;
+ // Should be transformed into 'target teams distribute parallel for'
+ #pragma omp target teams loop num_teams(32)
+ for (int i=0; i < N; i++) {
+ if (!nt) nt = omp_get_num_teams();
+ for (int j=0; j < N; j++)
+ a[j] = b[j] * N + nt;
+ }
+#else
+ // Should be transformed into 'target teams distribute parallel for'
+ // even with function call because of assume-no-nested-parallelism.
+ #pragma omp target teams loop collapse(2)
+ for (int i = 0; i < N; i++) {
+ for (int j = 0; j < N; j++) {
+ a[i] = b[i] * N + foo(j);
+ }
+ }
+#endif
+ return 0;
+}
+#endif
+// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41
+// IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// IR-GPU-NEXT: entry:
+// IR-GPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// IR-GPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment to ptr), ptr [[DYN_PTR]])
+// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
+// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// IR-GPU: user_code.entry:
+// IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
+// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2:[0-9]+]]
+// IR-GPU-NEXT: call void @__kmpc_target_deinit()
+// IR-GPU-NEXT: ret void
+// IR-GPU: worker.exit:
+// IR-GPU-NEXT: ret void
+//
+//
+// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_omp_outlined
+// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1:[0-9]+]] {
+// IR-GPU-NEXT: entry:
+// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[J5:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// IR-GPU-NEXT: [[J5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J5]] to ptr
+// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-GPU: omp.precond.then:
+// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// IR-GPU-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
+// IR-GPU-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
+// IR-GPU-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
+// IR-GPU-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-GPU: cond.true:
+// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[COND_END:%.*]]
+// IR-GPU: cond.false:
+// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[COND_END]]
+// IR-GPU: cond.end:
+// IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
+// IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-GPU: omp.inner.for.cond:
+// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1
+// IR-GPU-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]]
+// IR-GPU-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-GPU: omp.inner.for.body:
+// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
+// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
+// IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP21]], ptr [[N_CASTED_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// IR-GPU-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8
+// IR-GPU-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
+// IR-GPU-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8
+// IR-GPU-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// IR-GPU-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP22]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8
+// IR-GPU-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
+// IR-GPU-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP0]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8
+// IR-GPU-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
+// IR-GPU-NEXT: store ptr [[TMP1]], ptr [[TMP31]], align 8
+// IR-GPU-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 5
+// IR-GPU-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP2]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP33]], ptr [[TMP32]], align 8
+// IR-GPU-NEXT: [[TMP34:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 6
+// IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP34]], align 8
+// IR-GPU-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
+// IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP36]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 7)
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-GPU: omp.inner.for.inc:
+// IR-GPU-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
+// IR-GPU-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]]
+// IR-GPU-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP41]], [[TMP42]]
+// IR-GPU-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]]
+// IR-GPU-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
+// IR-GPU: cond.true12:
+// IR-GPU-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[COND_END14:%.*]]
+// IR-GPU: cond.false13:
+// IR-GPU-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[COND_END14]]
+// IR-GPU: cond.end14:
+// IR-GPU-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE12]] ], [ [[TMP46]], [[COND_FALSE13]] ]
+// IR-GPU-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-GPU: omp.inner.for.end:
+// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-GPU: omp.loop.exit:
+// IR-GPU-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4
+// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP49]])
+// IR-GPU-NEXT: br label [[OMP_PRECOND_END]]
+// IR-GPU: omp.precond.end:
+// IR-GPU-NEXT: ret void
+//
+//
+// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_omp_outlined_omp_outlined
+// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] {
+// IR-GPU-NEXT: entry:
+// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[J6:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// IR-GPU-NEXT: [[J6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J6]] to ptr
+// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-GPU: omp.precond.then:
+// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32
+// IR-GPU-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32
+// IR-GPU-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+// IR-GPU-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-GPU: omp.inner.for.cond:
+// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64
+// IR-GPU-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP14]]
+// IR-GPU-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-GPU: omp.inner.for.body:
+// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
+// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// IR-GPU-NEXT: store i32 [[ADD]], ptr [[J6_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[J6_ASCAST]], align 4
+// IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64
+// IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// IR-GPU-NEXT: [[TMP18:%.*]] = load i32, ptr [[J6_ASCAST]], align 4
+// IR-GPU-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64
+// IR-GPU-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM9]]
+// IR-GPU-NEXT: store i32 [[TMP17]], ptr [[ARRAYIDX10]], align 4
+// IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR-GPU: omp.body.continue:
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-GPU: omp.inner.for.inc:
+// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
+// IR-GPU-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-GPU: omp.inner.for.end:
+// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-GPU: omp.loop.exit:
+// IR-GPU-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
+// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP22]])
+// IR-GPU-NEXT: br label [[OMP_PRECOND_END]]
+// IR-GPU: omp.precond.end:
+// IR-GPU-NEXT: ret void
+//
+//
+// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46
+// IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] {
+// IR-GPU-NEXT: entry:
+// IR-GPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// IR-GPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_kernel_environment to ptr), ptr [[DYN_PTR]])
+// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
+// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// IR-GPU: user_code.entry:
+// IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
+// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2]]
+// IR-GPU-NEXT: call void @__kmpc_target_deinit()
+// IR-GPU-NEXT: ret void
+// IR-GPU: worker.exit:
+// IR-GPU-NEXT: ret void
+//
+//
+// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined
+// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] {
+// IR-GPU-NEXT: entry:
+// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[I11:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[J12:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// IR-GPU-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_4]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_5]] to ptr
+// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// IR-GPU-NEXT: [[I11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I11]] to ptr
+// IR-GPU-NEXT: [[J12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J12]] to ptr
+// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-GPU-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
+// IR-GPU-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
+// IR-GPU-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
+// IR-GPU-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
+// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
+// IR-GPU-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
+// IR-GPU-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
+// IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
+// IR-GPU-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-GPU: land.lhs.true:
+// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
+// IR-GPU-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
+// IR-GPU: omp.precond.then:
+// IR-GPU-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// IR-GPU-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
+// IR-GPU-NEXT: [[CONV13:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64
+// IR-GPU-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
+// IR-GPU-NEXT: call void @__kmpc_distribute_static_init_8(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 [[CONV13]])
+// IR-GPU-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
+// IR-GPU-NEXT: [[CMP14:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]]
+// IR-GPU-NEXT: br i1 [[CMP14]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-GPU: cond.true:
+// IR-GPU-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
+// IR-GPU-NEXT: br label [[COND_END:%.*]]
+// IR-GPU: cond.false:
+// IR-GPU-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NEXT: br label [[COND_END]]
+// IR-GPU: cond.end:
+// IR-GPU-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
+// IR-GPU-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-GPU: omp.inner.for.cond:
+// IR-GPU-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
+// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP19]], 1
+// IR-GPU-NEXT: [[CMP15:%.*]] = icmp slt i64 [[TMP18]], [[ADD]]
+// IR-GPU-NEXT: br i1 [[CMP15]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-GPU: omp.inner.for.body:
+// IR-GPU-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP22]], ptr [[N_CASTED_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// IR-GPU-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP20]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8
+// IR-GPU-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
+// IR-GPU-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP21]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8
+// IR-GPU-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// IR-GPU-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP23]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8
+// IR-GPU-NEXT: [[TMP30:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
+// IR-GPU-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP0]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8
+// IR-GPU-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
+// IR-GPU-NEXT: store ptr [[TMP1]], ptr [[TMP32]], align 8
+// IR-GPU-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 5
+// IR-GPU-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP2]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 8
+// IR-GPU-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 6
+// IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP35]], align 8
+// IR-GPU-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4
+// IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP37]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 7)
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-GPU: omp.inner.for.inc:
+// IR-GPU-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// IR-GPU-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], [[TMP39]]
+// IR-GPU-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// IR-GPU-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP40]], [[TMP41]]
+// IR-GPU-NEXT: store i64 [[ADD17]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// IR-GPU-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP42]], [[TMP43]]
+// IR-GPU-NEXT: store i64 [[ADD18]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP44:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
+// IR-GPU-NEXT: [[CMP19:%.*]] = icmp sgt i64 [[TMP44]], [[TMP45]]
+// IR-GPU-NEXT: br i1 [[CMP19]], label [[COND_TRUE20:%.*]], label [[COND_FALSE21:%.*]]
+// IR-GPU: cond.true20:
+// IR-GPU-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
+// IR-GPU-NEXT: br label [[COND_END22:%.*]]
+// IR-GPU: cond.false21:
+// IR-GPU-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NEXT: br label [[COND_END22]]
+// IR-GPU: cond.end22:
+// IR-GPU-NEXT: [[COND23:%.*]] = phi i64 [ [[TMP46]], [[COND_TRUE20]] ], [ [[TMP47]], [[COND_FALSE21]] ]
+// IR-GPU-NEXT: store i64 [[COND23]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[TMP48]], ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-GPU: omp.inner.for.end:
+// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-GPU: omp.loop.exit:
+// IR-GPU-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4
+// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP50]])
+// IR-GPU-NEXT: br label [[OMP_PRECOND_END]]
+// IR-GPU: omp.precond.end:
+// IR-GPU-NEXT: ret void
+//
+//
+// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined_omp_outlined
+// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] {
+// IR-GPU-NEXT: entry:
+// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[I11:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[J12:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// IR-GPU-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_4]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_5]] to ptr
+// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// IR-GPU-NEXT: [[I11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I11]] to ptr
+// IR-GPU-NEXT: [[J12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J12]] to ptr
+// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-GPU-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
+// IR-GPU-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
+// IR-GPU-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
+// IR-GPU-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
+// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
+// IR-GPU-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
+// IR-GPU-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
+// IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
+// IR-GPU-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-GPU: land.lhs.true:
+// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
+// IR-GPU-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
+// IR-GPU: omp.precond.then:
+// IR-GPU-NEXT: store i64 0, ptr [[DOTOMP_LB_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
+// IR-GPU-NEXT: call void @__kmpc_for_static_init_8(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP14]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 1)
+// IR-GPU-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-GPU: omp.inner.for.cond:
+// IR-GPU-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]]
+// IR-GPU-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-GPU: omp.inner.for.body:
+// IR-GPU-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
+// IR-GPU-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP19]], 0
+// IR-GPU-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
+// IR-GPU-NEXT: [[MUL16:%.*]] = mul nsw i32 1, [[DIV15]]
+// IR-GPU-NEXT: [[CONV17:%.*]] = sext i32 [[MUL16]] to i64
+// IR-GPU-NEXT: [[DIV18:%.*]] = sdiv i64 [[TMP18]], [[CONV17]]
+// IR-GPU-NEXT: [[MUL19:%.*]] = mul nsw i64 [[DIV18]], 1
+// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL19]]
+// IR-GPU-NEXT: [[CONV20:%.*]] = trunc i64 [[ADD]] to i32
+// IR-GPU-NEXT: store i32 [[CONV20]], ptr [[I11_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
+// IR-GPU-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP22]], 0
+// IR-GPU-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1
+// IR-GPU-NEXT: [[MUL23:%.*]] = mul nsw i32 1, [[DIV22]]
+// IR-GPU-NEXT: [[CONV24:%.*]] = sext i32 [[MUL23]] to i64
+// IR-GPU-NEXT: [[DIV25:%.*]] = sdiv i64 [[TMP21]], [[CONV24]]
+// IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
+// IR-GPU-NEXT: [[SUB26:%.*]] = sub nsw i32 [[TMP23]], 0
+// IR-GPU-NEXT: [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
+// IR-GPU-NEXT: [[MUL28:%.*]] = mul nsw i32 1, [[DIV27]]
+// IR-GPU-NEXT: [[CONV29:%.*]] = sext i32 [[MUL28]] to i64
+// IR-GPU-NEXT: [[MUL30:%.*]] = mul nsw i64 [[DIV25]], [[CONV29]]
+// IR-GPU-NEXT: [[SUB31:%.*]] = sub nsw i64 [[TMP20]], [[MUL30]]
+// IR-GPU-NEXT: [[MUL32:%.*]] = mul nsw i64 [[SUB31]], 1
+// IR-GPU-NEXT: [[ADD33:%.*]] = add nsw i64 0, [[MUL32]]
+// IR-GPU-NEXT: [[CONV34:%.*]] = trunc i64 [[ADD33]] to i32
+// IR-GPU-NEXT: store i32 [[CONV34]], ptr [[J12_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP24:%.*]] = load i32, ptr [[I11_ASCAST]], align 4
+// IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64
+// IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// IR-GPU-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// IR-GPU-NEXT: [[TMP26:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: [[MUL35:%.*]] = mul nsw i32 [[TMP25]], [[TMP26]]
+// IR-GPU-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD36:%.*]] = add nsw i32 [[MUL35]], [[TMP27]]
+// IR-GPU-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11_ASCAST]], align 4
+// IR-GPU-NEXT: [[IDXPROM37:%.*]] = sext i32 [[TMP28]] to i64
+// IR-GPU-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM37]]
+// IR-GPU-NEXT: store i32 [[ADD36]], ptr [[ARRAYIDX38]], align 4
+// IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR-GPU: omp.body.continue:
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-GPU: omp.inner.for.inc:
+// IR-GPU-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// IR-GPU-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP29]], [[TMP30]]
+// IR-GPU-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-GPU: omp.inner.for.end:
+// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-GPU: omp.loop.exit:
+// IR-GPU-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
+// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP32]])
+// IR-GPU-NEXT: br label [[OMP_PRECOND_END]]
+// IR-GPU: omp.precond.end:
+// IR-GPU-NEXT: ret void
+//
+//
+// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55
+// IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR4:[0-9]+]] {
+// IR-GPU-NEXT: entry:
+// IR-GPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// IR-GPU-NEXT: [[NT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// IR-GPU-NEXT: [[NT_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_CASTED]] to ptr
+// IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// IR-GPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[NT]], ptr [[NT_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_kernel_environment to ptr), ptr [[DYN_PTR]])
+// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
+// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// IR-GPU: user_code.entry:
+// IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
+// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP8:%.*]] = load i32, ptr [[NT_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP8]], ptr [[NT_CASTED_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP9:%.*]] = load i64, ptr [[NT_CASTED_ASCAST]], align 8
+// IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2]]
+// IR-GPU-NEXT: call void @__kmpc_target_deinit()
+// IR-GPU-NEXT: ret void
+// IR-GPU: worker.exit:
+// IR-GPU-NEXT: ret void
+//
+//
+// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_omp_outlined
+// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] {
+// IR-GPU-NEXT: entry:
+// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[I5:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [8 x ptr], align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// IR-GPU-NEXT: [[NT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// IR-GPU-NEXT: [[I5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I5]] to ptr
+// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// IR-GPU-NEXT: [[NT_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_CASTED]] to ptr
+// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[NT]], ptr [[NT_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-GPU: omp.precond.then:
+// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// IR-GPU-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
+// IR-GPU-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
+// IR-GPU-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
+// IR-GPU-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-GPU: cond.true:
+// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[COND_END:%.*]]
+// IR-GPU: cond.false:
+// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[COND_END]]
+// IR-GPU: cond.end:
+// IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
+// IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-GPU: omp.inner.for.cond:
+// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1
+// IR-GPU-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]]
+// IR-GPU-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-GPU: omp.inner.for.body:
+// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
+// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
+// IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP21]], ptr [[N_CASTED_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[NT_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP23]], ptr [[NT_CASTED_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP24:%.*]] = load i64, ptr [[NT_CASTED_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP25:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// IR-GPU-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP18]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8
+// IR-GPU-NEXT: [[TMP27:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
+// IR-GPU-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP20]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8
+// IR-GPU-NEXT: [[TMP29:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// IR-GPU-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP22]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8
+// IR-GPU-NEXT: [[TMP31:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
+// IR-GPU-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP24]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP32]], ptr [[TMP31]], align 8
+// IR-GPU-NEXT: [[TMP33:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
+// IR-GPU-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP0]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 8
+// IR-GPU-NEXT: [[TMP35:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 5
+// IR-GPU-NEXT: store ptr [[TMP1]], ptr [[TMP35]], align 8
+// IR-GPU-NEXT: [[TMP36:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 6
+// IR-GPU-NEXT: [[TMP37:%.*]] = inttoptr i64 [[TMP2]] to ptr
+// IR-GPU-NEXT: store ptr [[TMP37]], ptr [[TMP36]], align 8
+// IR-GPU-NEXT: [[TMP38:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 7
+// IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP38]], align 8
+// IR-GPU-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4
+// IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP40]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 8)
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-GPU: omp.inner.for.inc:
+// IR-GPU-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP41]], [[TMP42]]
+// IR-GPU-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP43]], [[TMP44]]
+// IR-GPU-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP45]], [[TMP46]]
+// IR-GPU-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP47]], [[TMP48]]
+// IR-GPU-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
+// IR-GPU: cond.true12:
+// IR-GPU-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[COND_END14:%.*]]
+// IR-GPU: cond.false13:
+// IR-GPU-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[COND_END14]]
+// IR-GPU: cond.end14:
+// IR-GPU-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP49]], [[COND_TRUE12]] ], [ [[TMP50]], [[COND_FALSE13]] ]
+// IR-GPU-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP51]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-GPU: omp.inner.for.end:
+// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-GPU: omp.loop.exit:
+// IR-GPU-NEXT: [[TMP52:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP52]], align 4
+// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP53]])
+// IR-GPU-NEXT: br label [[OMP_PRECOND_END]]
+// IR-GPU: omp.precond.end:
+// IR-GPU-NEXT: ret void
+//
+//
+// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_omp_outlined_omp_outlined
+// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] {
+// IR-GPU-NEXT: entry:
+// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[I6:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// IR-GPU-NEXT: [[NT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// IR-GPU-NEXT: [[I6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I6]] to ptr
+// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[NT]], ptr [[NT_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-GPU: omp.precond.then:
+// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32
+// IR-GPU-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32
+// IR-GPU-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+// IR-GPU-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// IR-GPU-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-GPU: omp.inner.for.cond:
+// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64
+// IR-GPU-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP14]]
+// IR-GPU-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-GPU: omp.inner.for.body:
+// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
+// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// IR-GPU-NEXT: store i32 [[ADD]], ptr [[I6_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[NT_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0
+// IR-GPU-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+// IR-GPU: if.then:
+// IR-GPU-NEXT: [[CALL:%.*]] = call noundef i32 @_Z17omp_get_num_teamsv() #[[ATTR6:[0-9]+]]
+// IR-GPU-NEXT: store i32 [[CALL]], ptr [[NT_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[IF_END]]
+// IR-GPU: if.end:
+// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[FOR_COND:%.*]]
+// IR-GPU: for.cond:
+// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[J_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]]
+// IR-GPU-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// IR-GPU: for.body:
+// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[J_ASCAST]], align 4
+// IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64
+// IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// IR-GPU-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]]
+// IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[NT_ADDR_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD11:%.*]] = add nsw i32 [[MUL10]], [[TMP22]]
+// IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[J_ASCAST]], align 4
+// IR-GPU-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP23]] to i64
+// IR-GPU-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM12]]
+// IR-GPU-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX13]], align 4
+// IR-GPU-NEXT: br label [[FOR_INC:%.*]]
+// IR-GPU: for.inc:
+// IR-GPU-NEXT: [[TMP24:%.*]] = load i32, ptr [[J_ASCAST]], align 4
+// IR-GPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP24]], 1
+// IR-GPU-NEXT: store i32 [[INC]], ptr [[J_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
+// IR-GPU: for.end:
+// IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR-GPU: omp.body.continue:
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-GPU: omp.inner.for.inc:
+// IR-GPU-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// IR-GPU-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP25]], [[TMP26]]
+// IR-GPU-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-GPU: omp.inner.for.end:
+// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-GPU: omp.loop.exit:
+// IR-GPU-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
+// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP28]])
+// IR-GPU-NEXT: br label [[OMP_PRECOND_END]]
+// IR-GPU: omp.precond.end:
+// IR-GPU-NEXT: ret void
+//
+//
+// IR-LABEL: define {{[^@]+}}@main
+// IR-SAME: () #[[ATTR0:[0-9]+]] {
+// IR-NEXT: entry:
+// IR-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// IR-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
+// IR-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
+// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-NEXT: [[N_CASTED2:%.*]] = alloca i64, align 8
+// IR-NEXT: [[NT:%.*]] = alloca i32, align 4
+// IR-NEXT: [[N_CASTED3:%.*]] = alloca i64, align 8
+// IR-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8
+// IR-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// IR-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4
+// IR-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+// IR-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
+// IR-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
+// IR-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16
+// IR-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
+// IR-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4
+// IR-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+// IR-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16
+// IR-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
+// IR-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4
+// IR-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4
+// IR-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]]
+// IR-NEXT: [[TMP7:%.*]] = load i32, ptr @N, align 4
+// IR-NEXT: store i32 [[TMP7]], ptr [[N_CASTED2]], align 4
+// IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[N_CASTED2]], align 8
+// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46(i64 [[TMP8]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3]]
+// IR-NEXT: store i32 0, ptr [[NT]], align 4
+// IR-NEXT: [[TMP9:%.*]] = load i32, ptr @N, align 4
+// IR-NEXT: store i32 [[TMP9]], ptr [[N_CASTED3]], align 4
+// IR-NEXT: [[TMP10:%.*]] = load i64, ptr [[N_CASTED3]], align 8
+// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[NT]], align 4
+// IR-NEXT: store i32 [[TMP11]], ptr [[NT_CASTED]], align 4
+// IR-NEXT: [[TMP12:%.*]] = load i64, ptr [[NT_CASTED]], align 8
+// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3]]
+// IR-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// IR-NEXT: [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
+// IR-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP13]])
+// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4
+// IR-NEXT: ret i32 [[TMP14]]
+//
+//
+// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41
+// IR-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] {
+// IR-NEXT: entry:
+// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4
+// IR-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-NEXT: ret void
+//
+//
+// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined
+// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-NEXT: entry:
+// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
+// IR-NEXT: [[J:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-NEXT: [[J5:%.*]] = alloca i32, align 4
+// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: store i32 0, ptr [[J]], align 4
+// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR: omp.precond.then:
+// IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
+// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
+// IR-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR: cond.true:
+// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: br label [[COND_END:%.*]]
+// IR: cond.false:
+// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: br label [[COND_END]]
+// IR: cond.end:
+// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
+// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR: omp.inner.for.cond:
+// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
+// IR-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR: omp.inner.for.body:
+// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
+// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
+// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4
+// IR-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined.omp_outlined, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR: omp.inner.for.inc:
+// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]]
+// IR-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR: omp.inner.for.end:
+// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR: omp.loop.exit:
+// IR-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
+// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]])
+// IR-NEXT: br label [[OMP_PRECOND_END]]
+// IR: omp.precond.end:
+// IR-NEXT: ret void
+//
+//
+// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined.omp_outlined
+// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-NEXT: entry:
+// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
+// IR-NEXT: [[J:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-NEXT: [[J6:%.*]] = alloca i32, align 4
+// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: store i32 0, ptr [[J]], align 4
+// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR: omp.precond.then:
+// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
+// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32
+// IR-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32
+// IR-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
+// IR-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
+// IR-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR: cond.true:
+// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: br label [[COND_END:%.*]]
+// IR: cond.false:
+// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: br label [[COND_END]]
+// IR: cond.end:
+// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ]
+// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// IR-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR: omp.inner.for.cond:
+// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
+// IR-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR: omp.inner.for.body:
+// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1
+// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// IR-NEXT: store i32 [[ADD]], ptr [[J6]], align 4
+// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[J6]], align 4
+// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64
+// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[J6]], align 4
+// IR-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP22]] to i64
+// IR-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM9]]
+// IR-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX10]], align 4
+// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR: omp.body.continue:
+// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR: omp.inner.for.inc:
+// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP23]], 1
+// IR-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR: omp.inner.for.end:
+// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR: omp.loop.exit:
+// IR-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
+// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]])
+// IR-NEXT: br label [[OMP_PRECOND_END]]
+// IR: omp.precond.end:
+// IR-NEXT: ret void
+//
+//
+// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46
+// IR-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-NEXT: entry:
+// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4
+// IR-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-NEXT: ret void
+//
+//
+// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined
+// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-NEXT: entry:
+// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
+// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
+// IR-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-NEXT: [[J:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8
+// IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8
+// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
+// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-NEXT: [[I11:%.*]] = alloca i32, align 4
+// IR-NEXT: [[J12:%.*]] = alloca i32, align 4
+// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
+// IR-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
+// IR-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
+// IR-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
+// IR-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
+// IR-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-NEXT: store i32 0, ptr [[I]], align 4
+// IR-NEXT: store i32 0, ptr [[J]], align 4
+// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
+// IR-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR: land.lhs.true:
+// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
+// IR-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
+// IR: omp.precond.then:
+// IR-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8
+// IR-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 8
+// IR-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
+// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
+// IR-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// IR-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// IR-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]]
+// IR-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR: cond.true:
+// IR-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-NEXT: br label [[COND_END:%.*]]
+// IR: cond.false:
+// IR-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// IR-NEXT: br label [[COND_END]]
+// IR: cond.end:
+// IR-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
+// IR-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8
+// IR-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
+// IR-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV]], align 8
+// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR: omp.inner.for.cond:
+// IR-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// IR-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP18]], [[TMP19]]
+// IR-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR: omp.inner.for.body:
+// IR-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
+// IR-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP22]], ptr [[N_CASTED]], align 4
+// IR-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined.omp_outlined, i64 [[TMP20]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR: omp.inner.for.inc:
+// IR-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// IR-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP24]], [[TMP25]]
+// IR-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8
+// IR-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR: omp.inner.for.end:
+// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR: omp.loop.exit:
+// IR-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]])
+// IR-NEXT: br label [[OMP_PRECOND_END]]
+// IR: omp.precond.end:
+// IR-NEXT: ret void
+//
+//
+// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined.omp_outlined
+// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-NEXT: entry:
+// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
+// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
+// IR-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-NEXT: [[J:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
+// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
+// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
+// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-NEXT: [[I11:%.*]] = alloca i32, align 4
+// IR-NEXT: [[J12:%.*]] = alloca i32, align 4
+// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
+// IR-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
+// IR-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
+// IR-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
+// IR-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
+// IR-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-NEXT: store i32 0, ptr [[I]], align 4
+// IR-NEXT: store i32 0, ptr [[J]], align 4
+// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
+// IR-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR: land.lhs.true:
+// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
+// IR-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
+// IR: omp.precond.then:
+// IR-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
+// IR-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8
+// IR-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB]], align 8
+// IR-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB]], align 8
+// IR-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
+// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
+// IR-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// IR-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// IR-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]]
+// IR-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR: cond.true:
+// IR-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-NEXT: br label [[COND_END:%.*]]
+// IR: cond.false:
+// IR-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// IR-NEXT: br label [[COND_END]]
+// IR: cond.end:
+// IR-NEXT: [[COND:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ]
+// IR-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8
+// IR-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
+// IR-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_IV]], align 8
+// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR: omp.inner.for.cond:
+// IR-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// IR-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP20]], [[TMP21]]
+// IR-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR: omp.inner.for.body:
+// IR-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP23]], 0
+// IR-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
+// IR-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
+// IR-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64
+// IR-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP22]], [[CONV18]]
+// IR-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
+// IR-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]]
+// IR-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32
+// IR-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4
+// IR-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP26]], 0
+// IR-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
+// IR-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
+// IR-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64
+// IR-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP25]], [[CONV25]]
+// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP27]], 0
+// IR-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
+// IR-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]]
+// IR-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64
+// IR-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]]
+// IR-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP24]], [[MUL31]]
+// IR-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1
+// IR-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]]
+// IR-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32
+// IR-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4
+// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11]], align 4
+// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64
+// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// IR-NEXT: [[TMP30:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: [[MUL36:%.*]] = mul nsw i32 [[TMP29]], [[TMP30]]
+// IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4
+// IR-NEXT: [[ADD37:%.*]] = add nsw i32 [[MUL36]], [[TMP31]]
+// IR-NEXT: [[TMP32:%.*]] = load i32, ptr [[I11]], align 4
+// IR-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP32]] to i64
+// IR-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM38]]
+// IR-NEXT: store i32 [[ADD37]], ptr [[ARRAYIDX39]], align 4
+// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR: omp.body.continue:
+// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR: omp.inner.for.inc:
+// IR-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP33]], 1
+// IR-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8
+// IR-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR: omp.inner.for.end:
+// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR: omp.loop.exit:
+// IR-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
+// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]])
+// IR-NEXT: br label [[OMP_PRECOND_END]]
+// IR: omp.precond.end:
+// IR-NEXT: ret void
+//
+//
+// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55
+// IR-SAME: (i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-NEXT: entry:
+// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8
+// IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
+// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 32, i32 0)
+// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4
+// IR-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[NT_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP7]], ptr [[NT_CASTED]], align 4
+// IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[NT_CASTED]], align 8
+// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 6, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined, i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP3]], ptr [[TMP4]])
+// IR-NEXT: ret void
+//
+//
+// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined
+// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-NEXT: entry:
+// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
+// IR-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-NEXT: [[I5:%.*]] = alloca i32, align 4
+// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8
+// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: store i32 0, ptr [[I]], align 4
+// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR: omp.precond.then:
+// IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
+// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
+// IR-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR: cond.true:
+// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: br label [[COND_END:%.*]]
+// IR: cond.false:
+// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: br label [[COND_END]]
+// IR: cond.end:
+// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
+// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR: omp.inner.for.cond:
+// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
+// IR-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR: omp.inner.for.body:
+// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
+// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
+// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4
+// IR-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[NT_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP23]], ptr [[NT_CASTED]], align 4
+// IR-NEXT: [[TMP24:%.*]] = load i64, ptr [[NT_CASTED]], align 8
+// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 8, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined.omp_outlined, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR: omp.inner.for.inc:
+// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]]
+// IR-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR: omp.inner.for.end:
+// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR: omp.loop.exit:
+// IR-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
+// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]])
+// IR-NEXT: br label [[OMP_PRECOND_END]]
+// IR: omp.precond.end:
+// IR-NEXT: ret void
+//
+//
+// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined.omp_outlined
+// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-NEXT: entry:
+// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
+// IR-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-NEXT: [[I6:%.*]] = alloca i32, align 4
+// IR-NEXT: [[J:%.*]] = alloca i32, align 4
+// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: store i32 0, ptr [[I]], align 4
+// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR: omp.precond.then:
+// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
+// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32
+// IR-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32
+// IR-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
+// IR-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
+// IR-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR: cond.true:
+// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT: br label [[COND_END:%.*]]
+// IR: cond.false:
+// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: br label [[COND_END]]
+// IR: cond.end:
+// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ]
+// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// IR-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR: omp.inner.for.cond:
+// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
+// IR-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR: omp.inner.for.body:
+// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1
+// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// IR-NEXT: store i32 [[ADD]], ptr [[I6]], align 4
+// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[NT_ADDR]], align 4
+// IR-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP20]], 0
+// IR-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+// IR: if.then:
+// IR-NEXT: [[CALL:%.*]] = call noundef i32 @_Z17omp_get_num_teamsv()
+// IR-NEXT: store i32 [[CALL]], ptr [[NT_ADDR]], align 4
+// IR-NEXT: br label [[IF_END]]
+// IR: if.end:
+// IR-NEXT: store i32 0, ptr [[J]], align 4
+// IR-NEXT: br label [[FOR_COND:%.*]]
+// IR: for.cond:
+// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[J]], align 4
+// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP21]], [[TMP22]]
+// IR-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// IR: for.body:
+// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[J]], align 4
+// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64
+// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP24]], [[TMP25]]
+// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[NT_ADDR]], align 4
+// IR-NEXT: [[ADD11:%.*]] = add nsw i32 [[MUL10]], [[TMP26]]
+// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[J]], align 4
+// IR-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64
+// IR-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM12]]
+// IR-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX13]], align 4
+// IR-NEXT: br label [[FOR_INC:%.*]]
+// IR: for.inc:
+// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[J]], align 4
+// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP28]], 1
+// IR-NEXT: store i32 [[INC]], ptr [[J]], align 4
+// IR-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// IR: for.end:
+// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR: omp.body.continue:
+// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR: omp.inner.for.inc:
+// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1
+// IR-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR: omp.inner.for.end:
+// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR: omp.loop.exit:
+// IR-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
+// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]])
+// IR-NEXT: br label [[OMP_PRECOND_END]]
+// IR: omp.precond.end:
+// IR-NEXT: ret void
+//
+//
+// IR-PCH-LABEL: define {{[^@]+}}@main
+// IR-PCH-SAME: () #[[ATTR0:[0-9]+]] {
+// IR-PCH-NEXT: entry:
+// IR-PCH-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[N_CASTED2:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[NT:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[N_CASTED3:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// IR-PCH-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4
+// IR-PCH-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+// IR-PCH-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
+// IR-PCH-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
+// IR-PCH-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16
+// IR-PCH-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
+// IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4
+// IR-PCH-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+// IR-PCH-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16
+// IR-PCH-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
+// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4
+// IR-PCH-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4
+// IR-PCH-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]]
+// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr @N, align 4
+// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[N_CASTED2]], align 4
+// IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[N_CASTED2]], align 8
+// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46(i64 [[TMP8]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3]]
+// IR-PCH-NEXT: store i32 0, ptr [[NT]], align 4
+// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr @N, align 4
+// IR-PCH-NEXT: store i32 [[TMP9]], ptr [[N_CASTED3]], align 4
+// IR-PCH-NEXT: [[TMP10:%.*]] = load i64, ptr [[N_CASTED3]], align 8
+// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[NT]], align 4
+// IR-PCH-NEXT: store i32 [[TMP11]], ptr [[NT_CASTED]], align 4
+// IR-PCH-NEXT: [[TMP12:%.*]] = load i64, ptr [[NT_CASTED]], align 8
+// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3]]
+// IR-PCH-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// IR-PCH-NEXT: [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
+// IR-PCH-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP13]])
+// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4
+// IR-PCH-NEXT: ret i32 [[TMP14]]
+//
+//
+// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41
+// IR-PCH-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] {
+// IR-PCH-NEXT: entry:
+// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4
+// IR-PCH-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-PCH-NEXT: ret void
+//
+//
+// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined
+// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-PCH-NEXT: entry:
+// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[J5:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4
+// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-PCH: omp.precond.then:
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-PCH-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
+// IR-PCH-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-PCH: cond.true:
+// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: br label [[COND_END:%.*]]
+// IR-PCH: cond.false:
+// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: br label [[COND_END]]
+// IR-PCH: cond.end:
+// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
+// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-PCH-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-PCH: omp.inner.for.cond:
+// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
+// IR-PCH-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-PCH: omp.inner.for.body:
+// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-PCH-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
+// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
+// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4
+// IR-PCH-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined.omp_outlined, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-PCH: omp.inner.for.inc:
+// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]]
+// IR-PCH-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-PCH: omp.inner.for.end:
+// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-PCH: omp.loop.exit:
+// IR-PCH-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]])
+// IR-PCH-NEXT: br label [[OMP_PRECOND_END]]
+// IR-PCH: omp.precond.end:
+// IR-PCH-NEXT: ret void
+//
+//
+// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined.omp_outlined
+// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-PCH-NEXT: entry:
+// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[J6:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4
+// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-PCH: omp.precond.then:
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
+// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-PCH-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32
+// IR-PCH-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-PCH-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32
+// IR-PCH-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
+// IR-PCH-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-PCH-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
+// IR-PCH-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-PCH: cond.true:
+// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: br label [[COND_END:%.*]]
+// IR-PCH: cond.false:
+// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: br label [[COND_END]]
+// IR-PCH: cond.end:
+// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ]
+// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// IR-PCH-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-PCH: omp.inner.for.cond:
+// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
+// IR-PCH-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-PCH: omp.inner.for.body:
+// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1
+// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// IR-PCH-NEXT: store i32 [[ADD]], ptr [[J6]], align 4
+// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[J6]], align 4
+// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64
+// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[J6]], align 4
+// IR-PCH-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP22]] to i64
+// IR-PCH-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM9]]
+// IR-PCH-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX10]], align 4
+// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR-PCH: omp.body.continue:
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-PCH: omp.inner.for.inc:
+// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP23]], 1
+// IR-PCH-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-PCH: omp.inner.for.end:
+// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-PCH: omp.loop.exit:
+// IR-PCH-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]])
+// IR-PCH-NEXT: br label [[OMP_PRECOND_END]]
+// IR-PCH: omp.precond.end:
+// IR-PCH-NEXT: ret void
+//
+//
+// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46
+// IR-PCH-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-PCH-NEXT: entry:
+// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4
+// IR-PCH-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-PCH-NEXT: ret void
+//
+//
+// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined
+// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-PCH-NEXT: entry:
+// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[I11:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[J12:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-PCH-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-PCH-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
+// IR-PCH-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
+// IR-PCH-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
+// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
+// IR-PCH-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
+// IR-PCH-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-PCH-NEXT: store i32 0, ptr [[I]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4
+// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
+// IR-PCH-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-PCH: land.lhs.true:
+// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-PCH-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
+// IR-PCH-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
+// IR-PCH: omp.precond.then:
+// IR-PCH-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8
+// IR-PCH-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-PCH-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 8
+// IR-PCH-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-PCH-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// IR-PCH-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// IR-PCH-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-PCH-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]]
+// IR-PCH-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-PCH: cond.true:
+// IR-PCH-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-PCH-NEXT: br label [[COND_END:%.*]]
+// IR-PCH: cond.false:
+// IR-PCH-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// IR-PCH-NEXT: br label [[COND_END]]
+// IR-PCH: cond.end:
+// IR-PCH-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
+// IR-PCH-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8
+// IR-PCH-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
+// IR-PCH-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-PCH: omp.inner.for.cond:
+// IR-PCH-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// IR-PCH-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP18]], [[TMP19]]
+// IR-PCH-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-PCH: omp.inner.for.body:
+// IR-PCH-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
+// IR-PCH-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP22]], ptr [[N_CASTED]], align 4
+// IR-PCH-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined.omp_outlined, i64 [[TMP20]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-PCH: omp.inner.for.inc:
+// IR-PCH-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP24]], [[TMP25]]
+// IR-PCH-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-PCH: omp.inner.for.end:
+// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-PCH: omp.loop.exit:
+// IR-PCH-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]])
+// IR-PCH-NEXT: br label [[OMP_PRECOND_END]]
+// IR-PCH: omp.precond.end:
+// IR-PCH-NEXT: ret void
+//
+//
+// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined.omp_outlined
+// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-PCH-NEXT: entry:
+// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[I11:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[J12:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-PCH-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-PCH-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
+// IR-PCH-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
+// IR-PCH-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
+// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
+// IR-PCH-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
+// IR-PCH-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-PCH-NEXT: store i32 0, ptr [[I]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4
+// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
+// IR-PCH-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-PCH: land.lhs.true:
+// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-PCH-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
+// IR-PCH-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
+// IR-PCH: omp.precond.then:
+// IR-PCH-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
+// IR-PCH-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-PCH-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8
+// IR-PCH-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB]], align 8
+// IR-PCH-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB]], align 8
+// IR-PCH-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-PCH-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// IR-PCH-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// IR-PCH-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-PCH-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]]
+// IR-PCH-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-PCH: cond.true:
+// IR-PCH-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-PCH-NEXT: br label [[COND_END:%.*]]
+// IR-PCH: cond.false:
+// IR-PCH-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// IR-PCH-NEXT: br label [[COND_END]]
+// IR-PCH: cond.end:
+// IR-PCH-NEXT: [[COND:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ]
+// IR-PCH-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8
+// IR-PCH-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
+// IR-PCH-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-PCH: omp.inner.for.cond:
+// IR-PCH-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// IR-PCH-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP20]], [[TMP21]]
+// IR-PCH-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-PCH: omp.inner.for.body:
+// IR-PCH-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-PCH-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP23]], 0
+// IR-PCH-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
+// IR-PCH-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
+// IR-PCH-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64
+// IR-PCH-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP22]], [[CONV18]]
+// IR-PCH-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
+// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]]
+// IR-PCH-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32
+// IR-PCH-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4
+// IR-PCH-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-PCH-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP26]], 0
+// IR-PCH-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
+// IR-PCH-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
+// IR-PCH-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64
+// IR-PCH-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP25]], [[CONV25]]
+// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-PCH-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP27]], 0
+// IR-PCH-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
+// IR-PCH-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]]
+// IR-PCH-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64
+// IR-PCH-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]]
+// IR-PCH-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP24]], [[MUL31]]
+// IR-PCH-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1
+// IR-PCH-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]]
+// IR-PCH-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32
+// IR-PCH-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4
+// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11]], align 4
+// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64
+// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// IR-PCH-NEXT: [[TMP30:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: [[MUL36:%.*]] = mul nsw i32 [[TMP29]], [[TMP30]]
+// IR-PCH-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4
+// IR-PCH-NEXT: [[ADD37:%.*]] = add nsw i32 [[MUL36]], [[TMP31]]
+// IR-PCH-NEXT: [[TMP32:%.*]] = load i32, ptr [[I11]], align 4
+// IR-PCH-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP32]] to i64
+// IR-PCH-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM38]]
+// IR-PCH-NEXT: store i32 [[ADD37]], ptr [[ARRAYIDX39]], align 4
+// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR-PCH: omp.body.continue:
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-PCH: omp.inner.for.inc:
+// IR-PCH-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP33]], 1
+// IR-PCH-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-PCH: omp.inner.for.end:
+// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-PCH: omp.loop.exit:
+// IR-PCH-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]])
+// IR-PCH-NEXT: br label [[OMP_PRECOND_END]]
+// IR-PCH: omp.precond.end:
+// IR-PCH-NEXT: ret void
+//
+//
+// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55
+// IR-PCH-SAME: (i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-PCH-NEXT: entry:
+// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
+// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 32, i32 0)
+// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4
+// IR-PCH-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[NT_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[NT_CASTED]], align 4
+// IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[NT_CASTED]], align 8
+// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 6, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined, i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP3]], ptr [[TMP4]])
+// IR-PCH-NEXT: ret void
+//
+//
+// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined
+// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-PCH-NEXT: entry:
+// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[I5:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[I]], align 4
+// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-PCH: omp.precond.then:
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-PCH-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
+// IR-PCH-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-PCH: cond.true:
+// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: br label [[COND_END:%.*]]
+// IR-PCH: cond.false:
+// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: br label [[COND_END]]
+// IR-PCH: cond.end:
+// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
+// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-PCH-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-PCH: omp.inner.for.cond:
+// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
+// IR-PCH-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-PCH: omp.inner.for.body:
+// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-PCH-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
+// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
+// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4
+// IR-PCH-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[NT_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP23]], ptr [[NT_CASTED]], align 4
+// IR-PCH-NEXT: [[TMP24:%.*]] = load i64, ptr [[NT_CASTED]], align 8
+// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 8, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined.omp_outlined, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-PCH: omp.inner.for.inc:
+// IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]]
+// IR-PCH-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-PCH: omp.inner.for.end:
+// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-PCH: omp.loop.exit:
+// IR-PCH-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]])
+// IR-PCH-NEXT: br label [[OMP_PRECOND_END]]
+// IR-PCH: omp.precond.end:
+// IR-PCH-NEXT: ret void
+//
+//
+// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined.omp_outlined
+// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-PCH-NEXT: entry:
+// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[I6:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
+// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[I]], align 4
+// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
+// IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-PCH: omp.precond.then:
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
+// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-PCH-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32
+// IR-PCH-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-PCH-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32
+// IR-PCH-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
+// IR-PCH-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-PCH-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
+// IR-PCH-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-PCH: cond.true:
+// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// IR-PCH-NEXT: br label [[COND_END:%.*]]
+// IR-PCH: cond.false:
+// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: br label [[COND_END]]
+// IR-PCH: cond.end:
+// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ]
+// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// IR-PCH-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-PCH: omp.inner.for.cond:
+// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
+// IR-PCH-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-PCH: omp.inner.for.body:
+// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1
+// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I6]], align 4
+// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[NT_ADDR]], align 4
+// IR-PCH-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP20]], 0
+// IR-PCH-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+// IR-PCH: if.then:
+// IR-PCH-NEXT: [[CALL:%.*]] = call noundef i32 @_Z17omp_get_num_teamsv()
+// IR-PCH-NEXT: store i32 [[CALL]], ptr [[NT_ADDR]], align 4
+// IR-PCH-NEXT: br label [[IF_END]]
+// IR-PCH: if.end:
+// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4
+// IR-PCH-NEXT: br label [[FOR_COND:%.*]]
+// IR-PCH: for.cond:
+// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[J]], align 4
+// IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP21]], [[TMP22]]
+// IR-PCH-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// IR-PCH: for.body:
+// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[J]], align 4
+// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64
+// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP24]], [[TMP25]]
+// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[NT_ADDR]], align 4
+// IR-PCH-NEXT: [[ADD11:%.*]] = add nsw i32 [[MUL10]], [[TMP26]]
+// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[J]], align 4
+// IR-PCH-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64
+// IR-PCH-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM12]]
+// IR-PCH-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX13]], align 4
+// IR-PCH-NEXT: br label [[FOR_INC:%.*]]
+// IR-PCH: for.inc:
+// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[J]], align 4
+// IR-PCH-NEXT: [[INC:%.*]] = add nsw i32 [[TMP28]], 1
+// IR-PCH-NEXT: store i32 [[INC]], ptr [[J]], align 4
+// IR-PCH-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// IR-PCH: for.end:
+// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR-PCH: omp.body.continue:
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-PCH: omp.inner.for.inc:
+// IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1
+// IR-PCH-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-PCH: omp.inner.for.end:
+// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-PCH: omp.loop.exit:
+// IR-PCH-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]])
+// IR-PCH-NEXT: br label [[OMP_PRECOND_END]]
+// IR-PCH: omp.precond.end:
+// IR-PCH-NEXT: ret void
+//
+//
+// IR-GPU-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64
+// IR-GPU-NESTED-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// IR-GPU-NESTED-NEXT: entry:
+// IR-GPU-NESTED-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// IR-GPU-NESTED-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// IR-GPU-NESTED-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_kernel_environment to ptr), ptr [[DYN_PTR]])
+// IR-GPU-NESTED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
+// IR-GPU-NESTED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// IR-GPU-NESTED: user_code.entry:
+// IR-GPU-NESTED-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
+// IR-GPU-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2:[0-9]+]]
+// IR-GPU-NESTED-NEXT: call void @__kmpc_target_deinit()
+// IR-GPU-NESTED-NEXT: ret void
+// IR-GPU-NESTED: worker.exit:
+// IR-GPU-NESTED-NEXT: ret void
+//
+//
+// IR-GPU-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined
+// IR-GPU-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1:[0-9]+]] {
+// IR-GPU-NESTED-NEXT: entry:
+// IR-GPU-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// IR-GPU-NESTED-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// IR-GPU-NESTED-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// IR-GPU-NESTED-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_4]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_5]] to ptr
+// IR-GPU-NESTED-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// IR-GPU-NESTED-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// IR-GPU-NESTED-NEXT: [[I11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I11]] to ptr
+// IR-GPU-NESTED-NEXT: [[J12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J12]] to ptr
+// IR-GPU-NESTED-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// IR-GPU-NESTED-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// IR-GPU-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// IR-GPU-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-GPU-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// IR-GPU-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
+// IR-GPU-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
+// IR-GPU-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
+// IR-GPU-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
+// IR-GPU-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
+// IR-GPU-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
+// IR-GPU-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-GPU-NESTED: land.lhs.true:
+// IR-GPU-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
+// IR-GPU-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
+// IR-GPU-NESTED: omp.precond.then:
+// IR-GPU-NESTED-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
+// IR-GPU-NESTED-NEXT: [[CONV13:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64
+// IR-GPU-NESTED-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
+// IR-GPU-NESTED-NEXT: call void @__kmpc_distribute_static_init_8(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 [[CONV13]])
+// IR-GPU-NESTED-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[CMP14:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]]
+// IR-GPU-NESTED-NEXT: br i1 [[CMP14]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-GPU-NESTED: cond.true:
+// IR-GPU-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: br label [[COND_END:%.*]]
+// IR-GPU-NESTED: cond.false:
+// IR-GPU-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: br label [[COND_END]]
+// IR-GPU-NESTED: cond.end:
+// IR-GPU-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
+// IR-GPU-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-GPU-NESTED: omp.inner.for.cond:
+// IR-GPU-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP19]], 1
+// IR-GPU-NESTED-NEXT: [[CMP15:%.*]] = icmp slt i64 [[TMP18]], [[ADD]]
+// IR-GPU-NESTED-NEXT: br i1 [[CMP15]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-GPU-NESTED: omp.inner.for.body:
+// IR-GPU-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: store i32 [[TMP22]], ptr [[N_CASTED_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// IR-GPU-NESTED-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP20]] to ptr
+// IR-GPU-NESTED-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
+// IR-GPU-NESTED-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP21]] to ptr
+// IR-GPU-NESTED-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// IR-GPU-NESTED-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP23]] to ptr
+// IR-GPU-NESTED-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP30:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
+// IR-GPU-NESTED-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP0]] to ptr
+// IR-GPU-NESTED-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
+// IR-GPU-NESTED-NEXT: store ptr [[TMP1]], ptr [[TMP32]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 5
+// IR-GPU-NESTED-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP2]] to ptr
+// IR-GPU-NESTED-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 6
+// IR-GPU-NESTED-NEXT: store ptr [[TMP3]], ptr [[TMP35]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4
+// IR-GPU-NESTED-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP37]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 7)
+// IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-GPU-NESTED: omp.inner.for.inc:
+// IR-GPU-NESTED-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], [[TMP39]]
+// IR-GPU-NESTED-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP40]], [[TMP41]]
+// IR-GPU-NESTED-NEXT: store i64 [[ADD17]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP42]], [[TMP43]]
+// IR-GPU-NESTED-NEXT: store i64 [[ADD18]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP44:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[CMP19:%.*]] = icmp sgt i64 [[TMP44]], [[TMP45]]
+// IR-GPU-NESTED-NEXT: br i1 [[CMP19]], label [[COND_TRUE20:%.*]], label [[COND_FALSE21:%.*]]
+// IR-GPU-NESTED: cond.true20:
+// IR-GPU-NESTED-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: br label [[COND_END22:%.*]]
+// IR-GPU-NESTED: cond.false21:
+// IR-GPU-NESTED-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: br label [[COND_END22]]
+// IR-GPU-NESTED: cond.end22:
+// IR-GPU-NESTED-NEXT: [[COND23:%.*]] = phi i64 [ [[TMP46]], [[COND_TRUE20]] ], [ [[TMP47]], [[COND_FALSE21]] ]
+// IR-GPU-NESTED-NEXT: store i64 [[COND23]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 [[TMP48]], ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-GPU-NESTED: omp.inner.for.end:
+// IR-GPU-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-GPU-NESTED: omp.loop.exit:
+// IR-GPU-NESTED-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4
+// IR-GPU-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP50]])
+// IR-GPU-NESTED-NEXT: br label [[OMP_PRECOND_END]]
+// IR-GPU-NESTED: omp.precond.end:
+// IR-GPU-NESTED-NEXT: ret void
+//
+//
+// IR-GPU-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined_omp_outlined
+// IR-GPU-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] {
+// IR-GPU-NESTED-NEXT: entry:
+// IR-GPU-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4, addrspace(5)
+// IR-GPU-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// IR-GPU-NESTED-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// IR-GPU-NESTED-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// IR-GPU-NESTED-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_4]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_5]] to ptr
+// IR-GPU-NESTED-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// IR-GPU-NESTED-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// IR-GPU-NESTED-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// IR-GPU-NESTED-NEXT: [[I11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I11]] to ptr
+// IR-GPU-NESTED-NEXT: [[J12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J12]] to ptr
+// IR-GPU-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// IR-GPU-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-GPU-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// IR-GPU-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
+// IR-GPU-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
+// IR-GPU-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
+// IR-GPU-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
+// IR-GPU-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
+// IR-GPU-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
+// IR-GPU-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-GPU-NESTED: land.lhs.true:
+// IR-GPU-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
+// IR-GPU-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
+// IR-GPU-NESTED: omp.precond.then:
+// IR-GPU-NESTED-NEXT: store i64 0, ptr [[DOTOMP_LB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
+// IR-GPU-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP14]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 1)
+// IR-GPU-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-GPU-NESTED: omp.inner.for.cond:
+// IR-GPU-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]]
+// IR-GPU-NESTED-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-GPU-NESTED: omp.inner.for.body:
+// IR-GPU-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP19]], 0
+// IR-GPU-NESTED-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
+// IR-GPU-NESTED-NEXT: [[MUL16:%.*]] = mul nsw i32 1, [[DIV15]]
+// IR-GPU-NESTED-NEXT: [[CONV17:%.*]] = sext i32 [[MUL16]] to i64
+// IR-GPU-NESTED-NEXT: [[DIV18:%.*]] = sdiv i64 [[TMP18]], [[CONV17]]
+// IR-GPU-NESTED-NEXT: [[MUL19:%.*]] = mul nsw i64 [[DIV18]], 1
+// IR-GPU-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL19]]
+// IR-GPU-NESTED-NEXT: [[CONV20:%.*]] = trunc i64 [[ADD]] to i32
+// IR-GPU-NESTED-NEXT: store i32 [[CONV20]], ptr [[I11_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP22]], 0
+// IR-GPU-NESTED-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1
+// IR-GPU-NESTED-NEXT: [[MUL23:%.*]] = mul nsw i32 1, [[DIV22]]
+// IR-GPU-NESTED-NEXT: [[CONV24:%.*]] = sext i32 [[MUL23]] to i64
+// IR-GPU-NESTED-NEXT: [[DIV25:%.*]] = sdiv i64 [[TMP21]], [[CONV24]]
+// IR-GPU-NESTED-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[SUB26:%.*]] = sub nsw i32 [[TMP23]], 0
+// IR-GPU-NESTED-NEXT: [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
+// IR-GPU-NESTED-NEXT: [[MUL28:%.*]] = mul nsw i32 1, [[DIV27]]
+// IR-GPU-NESTED-NEXT: [[CONV29:%.*]] = sext i32 [[MUL28]] to i64
+// IR-GPU-NESTED-NEXT: [[MUL30:%.*]] = mul nsw i64 [[DIV25]], [[CONV29]]
+// IR-GPU-NESTED-NEXT: [[SUB31:%.*]] = sub nsw i64 [[TMP20]], [[MUL30]]
+// IR-GPU-NESTED-NEXT: [[MUL32:%.*]] = mul nsw i64 [[SUB31]], 1
+// IR-GPU-NESTED-NEXT: [[ADD33:%.*]] = add nsw i64 0, [[MUL32]]
+// IR-GPU-NESTED-NEXT: [[CONV34:%.*]] = trunc i64 [[ADD33]] to i32
+// IR-GPU-NESTED-NEXT: store i32 [[CONV34]], ptr [[J12_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[TMP24:%.*]] = load i32, ptr [[I11_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64
+// IR-GPU-NESTED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// IR-GPU-NESTED-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// IR-GPU-NESTED-NEXT: [[TMP26:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[MUL35:%.*]] = mul nsw i32 [[TMP25]], [[TMP26]]
+// IR-GPU-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP27]]) #[[ATTR5:[0-9]+]]
+// IR-GPU-NESTED-NEXT: [[ADD36:%.*]] = add nsw i32 [[MUL35]], [[CALL]]
+// IR-GPU-NESTED-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11_ASCAST]], align 4
+// IR-GPU-NESTED-NEXT: [[IDXPROM37:%.*]] = sext i32 [[TMP28]] to i64
+// IR-GPU-NESTED-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM37]]
+// IR-GPU-NESTED-NEXT: store i32 [[ADD36]], ptr [[ARRAYIDX38]], align 4
+// IR-GPU-NESTED-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR-GPU-NESTED: omp.body.continue:
+// IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-GPU-NESTED: omp.inner.for.inc:
+// IR-GPU-NESTED-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP29]], [[TMP30]]
+// IR-GPU-NESTED-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-GPU-NESTED: omp.inner.for.end:
+// IR-GPU-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-GPU-NESTED: omp.loop.exit:
+// IR-GPU-NESTED-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// IR-GPU-NESTED-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
+// IR-GPU-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP32]])
+// IR-GPU-NESTED-NEXT: br label [[OMP_PRECOND_END]]
+// IR-GPU-NESTED: omp.precond.end:
+// IR-GPU-NESTED-NEXT: ret void
+//
+//
+// IR-NESTED-LABEL: define {{[^@]+}}@main
+// IR-NESTED-SAME: () #[[ATTR0:[0-9]+]] {
+// IR-NESTED-NEXT: entry:
+// IR-NESTED-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
+// IR-NESTED-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// IR-NESTED-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4
+// IR-NESTED-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+// IR-NESTED-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
+// IR-NESTED-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
+// IR-NESTED-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16
+// IR-NESTED-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
+// IR-NESTED-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4
+// IR-NESTED-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+// IR-NESTED-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16
+// IR-NESTED-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
+// IR-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4
+// IR-NESTED-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4
+// IR-NESTED-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-NESTED-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]]
+// IR-NESTED-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// IR-NESTED-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
+// IR-NESTED-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP7]])
+// IR-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4
+// IR-NESTED-NEXT: ret i32 [[TMP8]]
+//
+//
+// IR-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64
+// IR-NESTED-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] {
+// IR-NESTED-NEXT: entry:
+// IR-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NESTED-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4
+// IR-NESTED-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-NESTED-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-NESTED-NEXT: ret void
+//
+//
+// IR-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined
+// IR-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-NESTED-NEXT: entry:
+// IR-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// IR-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// IR-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
+// IR-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
+// IR-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
+// IR-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
+// IR-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
+// IR-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-NESTED-NEXT: store i32 0, ptr [[I]], align 4
+// IR-NESTED-NEXT: store i32 0, ptr [[J]], align 4
+// IR-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
+// IR-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-NESTED: land.lhs.true:
+// IR-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
+// IR-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
+// IR-NESTED: omp.precond.then:
+// IR-NESTED-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8
+// IR-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 8
+// IR-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
+// IR-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-NESTED-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NESTED-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
+// IR-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// IR-NESTED-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// IR-NESTED-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-NESTED-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]]
+// IR-NESTED-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-NESTED: cond.true:
+// IR-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-NESTED-NEXT: br label [[COND_END:%.*]]
+// IR-NESTED: cond.false:
+// IR-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// IR-NESTED-NEXT: br label [[COND_END]]
+// IR-NESTED: cond.end:
+// IR-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
+// IR-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8
+// IR-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
+// IR-NESTED-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV]], align 8
+// IR-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-NESTED: omp.inner.for.cond:
+// IR-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// IR-NESTED-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP18]], [[TMP19]]
+// IR-NESTED-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-NESTED: omp.inner.for.body:
+// IR-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
+// IR-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// IR-NESTED-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NESTED-NEXT: store i32 [[TMP22]], ptr [[N_CASTED]], align 4
+// IR-NESTED-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-NESTED-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined.omp_outlined, i64 [[TMP20]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-NESTED: omp.inner.for.inc:
+// IR-NESTED-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-NESTED-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// IR-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP24]], [[TMP25]]
+// IR-NESTED-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8
+// IR-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-NESTED: omp.inner.for.end:
+// IR-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-NESTED: omp.loop.exit:
+// IR-NESTED-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// IR-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]])
+// IR-NESTED-NEXT: br label [[OMP_PRECOND_END]]
+// IR-NESTED: omp.precond.end:
+// IR-NESTED-NEXT: ret void
+//
+//
+// IR-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined.omp_outlined
+// IR-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-NESTED-NEXT: entry:
+// IR-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-NESTED-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
+// IR-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4
+// IR-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-NESTED-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-NESTED-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// IR-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// IR-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
+// IR-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
+// IR-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
+// IR-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
+// IR-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
+// IR-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-NESTED-NEXT: store i32 0, ptr [[I]], align 4
+// IR-NESTED-NEXT: store i32 0, ptr [[J]], align 4
+// IR-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
+// IR-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-NESTED: land.lhs.true:
+// IR-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
+// IR-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
+// IR-NESTED: omp.precond.then:
+// IR-NESTED-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
+// IR-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8
+// IR-NESTED-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-NESTED-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-NESTED-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB]], align 8
+// IR-NESTED-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB]], align 8
+// IR-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
+// IR-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-NESTED-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NESTED-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
+// IR-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// IR-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// IR-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-NESTED-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]]
+// IR-NESTED-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-NESTED: cond.true:
+// IR-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-NESTED-NEXT: br label [[COND_END:%.*]]
+// IR-NESTED: cond.false:
+// IR-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// IR-NESTED-NEXT: br label [[COND_END]]
+// IR-NESTED: cond.end:
+// IR-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ]
+// IR-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8
+// IR-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
+// IR-NESTED-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_IV]], align 8
+// IR-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-NESTED: omp.inner.for.cond:
+// IR-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// IR-NESTED-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP20]], [[TMP21]]
+// IR-NESTED-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-NESTED: omp.inner.for.body:
+// IR-NESTED-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-NESTED-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-NESTED-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP23]], 0
+// IR-NESTED-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
+// IR-NESTED-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
+// IR-NESTED-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64
+// IR-NESTED-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP22]], [[CONV18]]
+// IR-NESTED-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
+// IR-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]]
+// IR-NESTED-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32
+// IR-NESTED-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4
+// IR-NESTED-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-NESTED-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-NESTED-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-NESTED-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP26]], 0
+// IR-NESTED-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
+// IR-NESTED-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
+// IR-NESTED-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64
+// IR-NESTED-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP25]], [[CONV25]]
+// IR-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-NESTED-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP27]], 0
+// IR-NESTED-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
+// IR-NESTED-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]]
+// IR-NESTED-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64
+// IR-NESTED-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]]
+// IR-NESTED-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP24]], [[MUL31]]
+// IR-NESTED-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1
+// IR-NESTED-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]]
+// IR-NESTED-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32
+// IR-NESTED-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4
+// IR-NESTED-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11]], align 4
+// IR-NESTED-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64
+// IR-NESTED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// IR-NESTED-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// IR-NESTED-NEXT: [[TMP30:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-NESTED-NEXT: [[MUL36:%.*]] = mul nsw i32 [[TMP29]], [[TMP30]]
+// IR-NESTED-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4
+// IR-NESTED-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP31]])
+// IR-NESTED-NEXT: [[ADD37:%.*]] = add nsw i32 [[MUL36]], [[CALL]]
+// IR-NESTED-NEXT: [[TMP32:%.*]] = load i32, ptr [[I11]], align 4
+// IR-NESTED-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP32]] to i64
+// IR-NESTED-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM38]]
+// IR-NESTED-NEXT: store i32 [[ADD37]], ptr [[ARRAYIDX39]], align 4
+// IR-NESTED-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR-NESTED: omp.body.continue:
+// IR-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-NESTED: omp.inner.for.inc:
+// IR-NESTED-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-NESTED-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP33]], 1
+// IR-NESTED-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8
+// IR-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-NESTED: omp.inner.for.end:
+// IR-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-NESTED: omp.loop.exit:
+// IR-NESTED-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NESTED-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
+// IR-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]])
+// IR-NESTED-NEXT: br label [[OMP_PRECOND_END]]
+// IR-NESTED: omp.precond.end:
+// IR-NESTED-NEXT: ret void
+//
+//
+// IR-PCH-NESTED-LABEL: define {{[^@]+}}@main
+// IR-PCH-NESTED-SAME: () #[[ATTR0:[0-9]+]] {
+// IR-PCH-NESTED-NEXT: entry:
+// IR-PCH-NESTED-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8
+// IR-PCH-NESTED-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// IR-PCH-NESTED-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4
+// IR-PCH-NESTED-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+// IR-PCH-NESTED-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0()
+// IR-PCH-NESTED-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8
+// IR-PCH-NESTED-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16
+// IR-PCH-NESTED-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4
+// IR-PCH-NESTED-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+// IR-PCH-NESTED-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16
+// IR-PCH-NESTED-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4
+// IR-PCH-NESTED-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4
+// IR-PCH-NESTED-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-PCH-NESTED-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]]
+// IR-PCH-NESTED-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// IR-PCH-NESTED-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
+// IR-PCH-NESTED-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP7]])
+// IR-PCH-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4
+// IR-PCH-NESTED-NEXT: ret i32 [[TMP8]]
+//
+//
+// IR-PCH-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64
+// IR-PCH-NESTED-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] {
+// IR-PCH-NESTED-NEXT: entry:
+// IR-PCH-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NESTED-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4
+// IR-PCH-NESTED-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-PCH-NESTED-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-PCH-NESTED-NEXT: ret void
+//
+//
+// IR-PCH-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined
+// IR-PCH-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-PCH-NESTED-NEXT: entry:
+// IR-PCH-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-PCH-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-PCH-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// IR-PCH-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-PCH-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// IR-PCH-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-PCH-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
+// IR-PCH-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
+// IR-PCH-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
+// IR-PCH-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
+// IR-PCH-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
+// IR-PCH-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-PCH-NESTED-NEXT: store i32 0, ptr [[I]], align 4
+// IR-PCH-NESTED-NEXT: store i32 0, ptr [[J]], align 4
+// IR-PCH-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
+// IR-PCH-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-PCH-NESTED: land.lhs.true:
+// IR-PCH-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-PCH-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
+// IR-PCH-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
+// IR-PCH-NESTED: omp.precond.then:
+// IR-PCH-NESTED-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-PCH-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 8
+// IR-PCH-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
+// IR-PCH-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-PCH-NESTED-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
+// IR-PCH-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// IR-PCH-NESTED-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-PCH-NESTED-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]]
+// IR-PCH-NESTED-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-PCH-NESTED: cond.true:
+// IR-PCH-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-PCH-NESTED-NEXT: br label [[COND_END:%.*]]
+// IR-PCH-NESTED: cond.false:
+// IR-PCH-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// IR-PCH-NESTED-NEXT: br label [[COND_END]]
+// IR-PCH-NESTED: cond.end:
+// IR-PCH-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
+// IR-PCH-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
+// IR-PCH-NESTED-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-PCH-NESTED: omp.inner.for.cond:
+// IR-PCH-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// IR-PCH-NESTED-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP18]], [[TMP19]]
+// IR-PCH-NESTED-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-PCH-NESTED: omp.inner.for.body:
+// IR-PCH-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NESTED-NEXT: store i32 [[TMP22]], ptr [[N_CASTED]], align 4
+// IR-PCH-NESTED-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// IR-PCH-NESTED-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined.omp_outlined, i64 [[TMP20]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]])
+// IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-PCH-NESTED: omp.inner.for.inc:
+// IR-PCH-NESTED-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// IR-PCH-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP24]], [[TMP25]]
+// IR-PCH-NESTED-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-PCH-NESTED: omp.inner.for.end:
+// IR-PCH-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-PCH-NESTED: omp.loop.exit:
+// IR-PCH-NESTED-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// IR-PCH-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]])
+// IR-PCH-NESTED-NEXT: br label [[OMP_PRECOND_END]]
+// IR-PCH-NESTED: omp.precond.end:
+// IR-PCH-NESTED-NEXT: ret void
+//
+//
+// IR-PCH-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined.omp_outlined
+// IR-PCH-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// IR-PCH-NESTED-NEXT: entry:
+// IR-PCH-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NESTED-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// IR-PCH-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
+// IR-PCH-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4
+// IR-PCH-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// IR-PCH-NESTED-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-PCH-NESTED-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-PCH-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-PCH-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
+// IR-PCH-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-PCH-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// IR-PCH-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-PCH-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0
+// IR-PCH-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
+// IR-PCH-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
+// IR-PCH-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
+// IR-PCH-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
+// IR-PCH-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-PCH-NESTED-NEXT: store i32 0, ptr [[I]], align 4
+// IR-PCH-NESTED-NEXT: store i32 0, ptr [[J]], align 4
+// IR-PCH-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]]
+// IR-PCH-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR-PCH-NESTED: land.lhs.true:
+// IR-PCH-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-PCH-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]]
+// IR-PCH-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
+// IR-PCH-NESTED: omp.precond.then:
+// IR-PCH-NESTED-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-PCH-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// IR-PCH-NESTED-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB]], align 8
+// IR-PCH-NESTED-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB]], align 8
+// IR-PCH-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
+// IR-PCH-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-PCH-NESTED-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
+// IR-PCH-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// IR-PCH-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-PCH-NESTED-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]]
+// IR-PCH-NESTED-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR-PCH-NESTED: cond.true:
+// IR-PCH-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
+// IR-PCH-NESTED-NEXT: br label [[COND_END:%.*]]
+// IR-PCH-NESTED: cond.false:
+// IR-PCH-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// IR-PCH-NESTED-NEXT: br label [[COND_END]]
+// IR-PCH-NESTED: cond.end:
+// IR-PCH-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ]
+// IR-PCH-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
+// IR-PCH-NESTED-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-PCH-NESTED: omp.inner.for.cond:
+// IR-PCH-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// IR-PCH-NESTED-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP20]], [[TMP21]]
+// IR-PCH-NESTED-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-PCH-NESTED: omp.inner.for.body:
+// IR-PCH-NESTED-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-PCH-NESTED-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP23]], 0
+// IR-PCH-NESTED-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
+// IR-PCH-NESTED-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
+// IR-PCH-NESTED-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64
+// IR-PCH-NESTED-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP22]], [[CONV18]]
+// IR-PCH-NESTED-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
+// IR-PCH-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]]
+// IR-PCH-NESTED-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32
+// IR-PCH-NESTED-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4
+// IR-PCH-NESTED-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-PCH-NESTED-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP26]], 0
+// IR-PCH-NESTED-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
+// IR-PCH-NESTED-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
+// IR-PCH-NESTED-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64
+// IR-PCH-NESTED-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP25]], [[CONV25]]
+// IR-PCH-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// IR-PCH-NESTED-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP27]], 0
+// IR-PCH-NESTED-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
+// IR-PCH-NESTED-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]]
+// IR-PCH-NESTED-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64
+// IR-PCH-NESTED-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]]
+// IR-PCH-NESTED-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP24]], [[MUL31]]
+// IR-PCH-NESTED-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1
+// IR-PCH-NESTED-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]]
+// IR-PCH-NESTED-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32
+// IR-PCH-NESTED-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4
+// IR-PCH-NESTED-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11]], align 4
+// IR-PCH-NESTED-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64
+// IR-PCH-NESTED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// IR-PCH-NESTED-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// IR-PCH-NESTED-NEXT: [[TMP30:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// IR-PCH-NESTED-NEXT: [[MUL36:%.*]] = mul nsw i32 [[TMP29]], [[TMP30]]
+// IR-PCH-NESTED-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4
+// IR-PCH-NESTED-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP31]])
+// IR-PCH-NESTED-NEXT: [[ADD37:%.*]] = add nsw i32 [[MUL36]], [[CALL]]
+// IR-PCH-NESTED-NEXT: [[TMP32:%.*]] = load i32, ptr [[I11]], align 4
+// IR-PCH-NESTED-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP32]] to i64
+// IR-PCH-NESTED-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM38]]
+// IR-PCH-NESTED-NEXT: store i32 [[ADD37]], ptr [[ARRAYIDX39]], align 4
+// IR-PCH-NESTED-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR-PCH-NESTED: omp.body.continue:
+// IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-PCH-NESTED: omp.inner.for.inc:
+// IR-PCH-NESTED-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NESTED-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP33]], 1
+// IR-PCH-NESTED-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]]
+// IR-PCH-NESTED: omp.inner.for.end:
+// IR-PCH-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// IR-PCH-NESTED: omp.loop.exit:
+// IR-PCH-NESTED-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NESTED-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
+// IR-PCH-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]])
+// IR-PCH-NESTED-NEXT: br label [[OMP_PRECOND_END]]
+// IR-PCH-NESTED: omp.precond.end:
+// IR-PCH-NESTED-NEXT: ret void
+//
diff --git a/clang/test/OpenMP/target_teams_generic_loop_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_collapse_codegen.cpp
index 0a6ae1ad405c26f..3fa20401fbc8c40 100644
--- a/clang/test/OpenMP/target_teams_generic_loop_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_generic_loop_collapse_codegen.cpp
@@ -239,7 +239,7 @@ int main (int argc, char **argv) {
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK1-NEXT: ret void
//
//
@@ -278,7 +278,7 @@ int main (int argc, char **argv) {
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -331,7 +331,7 @@ int main (int argc, char **argv) {
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
// CHECK1-NEXT: ret void
//
//
@@ -480,7 +480,7 @@ int main (int argc, char **argv) {
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK3-NEXT: ret void
//
//
@@ -517,7 +517,7 @@ int main (int argc, char **argv) {
// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087
// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -568,7 +568,7 @@ int main (int argc, char **argv) {
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
// CHECK3-NEXT: ret void
//
//
@@ -860,7 +860,7 @@ int main (int argc, char **argv) {
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP28]])
+// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]])
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
// CHECK9: omp.precond.end:
// CHECK9-NEXT: ret void
@@ -940,7 +940,7 @@ int main (int argc, char **argv) {
// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
-// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]]
@@ -1014,7 +1014,7 @@ int main (int argc, char **argv) {
// CHECK9: omp.loop.exit:
// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
-// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP32]])
+// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]])
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
// CHECK9: omp.precond.end:
// CHECK9-NEXT: ret void
@@ -1147,7 +1147,7 @@ int main (int argc, char **argv) {
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
-// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK9-NEXT: ret void
//
//
@@ -1238,7 +1238,7 @@ int main (int argc, char **argv) {
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
-// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
// CHECK9-NEXT: ret void
//
//
@@ -1531,7 +1531,7 @@ int main (int argc, char **argv) {
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP30]])
+// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]])
// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
// CHECK11: omp.precond.end:
// CHECK11-NEXT: ret void
@@ -1613,7 +1613,7 @@ int main (int argc, char **argv) {
// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
-// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
// CHECK11-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]]
@@ -1685,7 +1685,7 @@ int main (int argc, char **argv) {
// CHECK11: omp.loop.exit:
// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
-// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP32]])
+// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]])
// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
// CHECK11: omp.precond.end:
// CHECK11-NEXT: ret void
@@ -1816,7 +1816,7 @@ int main (int argc, char **argv) {
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
-// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK11-NEXT: ret void
//
//
@@ -1903,7 +1903,7 @@ int main (int argc, char **argv) {
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
-// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]])
// CHECK11-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp
index 4291a405e4baf45..3c9f5d1dee66b67 100644
--- a/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp
@@ -210,7 +210,7 @@ int main() {
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK1-NEXT: ret void
//
//
@@ -244,7 +244,7 @@ int main() {
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -280,7 +280,7 @@ int main() {
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
// CHECK1-NEXT: ret void
//
//
@@ -332,78 +332,8 @@ int main() {
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l51.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK1-NEXT: ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l51.omp_outlined.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]]
-// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: br label [[COND_END]]
-// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// CHECK1-NEXT: call void @_Z9gtid_testv()
@@ -411,14 +341,14 @@ int main() {
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK1-NEXT: ret void
//
//
@@ -586,78 +516,8 @@ int main() {
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK1-NEXT: ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76.omp_outlined.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]]
-// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: br label [[COND_END]]
-// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// CHECK1-NEXT: call void @_Z3fn4v()
@@ -665,14 +525,14 @@ int main() {
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK1-NEXT: ret void
//
//
@@ -695,7 +555,6 @@ int main() {
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
@@ -725,82 +584,8 @@ int main() {
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
-// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK1-NEXT: ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]]
-// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: br label [[COND_END]]
-// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// CHECK1-NEXT: call void @_Z3fn5v()
@@ -808,14 +593,14 @@ int main() {
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK1-NEXT: ret void
//
//
@@ -847,7 +632,6 @@ int main() {
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
@@ -878,117 +662,39 @@ int main() {
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
-// CHECK1: omp_if.then:
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
-// CHECK1-NEXT: br label [[OMP_IF_END:%.*]]
-// CHECK1: omp_if.else:
-// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined.omp_outlined(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
-// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK1-NEXT: br label [[OMP_IF_END]]
-// CHECK1: omp_if.end:
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK1-NEXT: call void @_Z3fn6v()
+// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK1-NEXT: ret void
//
//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
+// CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiEiT_
+// CHECK1-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0]] comdat {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]]
-// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: br label [[COND_END]]
-// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK1-NEXT: call void @_Z3fn6v()
-// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
-// CHECK1: omp.body.continue:
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
-// CHECK1-NEXT: ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiEiT_
-// CHECK1-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0]] comdat {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-NEXT: store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-NEXT: store i32 2, ptr [[TMP0]], align 4
@@ -1026,44 +732,61 @@ int main() {
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64() #[[ATTR2]]
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0
-// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP16]] to i1
+// CHECK1-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
+// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP18]] to i1
+// CHECK1-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK1: omp_if.then:
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
-// CHECK1-NEXT: store i32 2, ptr [[TMP16]], align 4
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
-// CHECK1-NEXT: store i32 0, ptr [[TMP17]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 2
-// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 3
-// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 4
-// CHECK1-NEXT: store ptr null, ptr [[TMP20]], align 8
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 5
+// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP19]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP20]], align 8
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-NEXT: store ptr null, ptr [[TMP21]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8
-// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 8
-// CHECK1-NEXT: store i64 100, ptr [[TMP24]], align 8
-// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 9
-// CHECK1-NEXT: store i64 0, ptr [[TMP25]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 10
-// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP26]], align 4
-// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 11
-// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP27]], align 4
-// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 12
-// CHECK1-NEXT: store i32 0, ptr [[TMP28]], align 4
-// CHECK1-NEXT: [[TMP29:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS2]])
-// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
-// CHECK1-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]]
-// CHECK1: omp_offload.failed3:
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68() #[[ATTR2]]
-// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT4]]
-// CHECK1: omp_offload.cont4:
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
+// CHECK1-NEXT: store i32 2, ptr [[TMP24]], align 4
+// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP25]], align 4
+// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP26]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP27]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.1, ptr [[TMP28]], align 8
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP29]], align 8
+// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8
+// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP31]], align 8
+// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
+// CHECK1-NEXT: store i64 100, ptr [[TMP32]], align 8
+// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
+// CHECK1-NEXT: store i64 0, ptr [[TMP33]], align 8
+// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
+// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP34]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
+// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP35]], align 4
+// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
+// CHECK1-NEXT: store i32 0, ptr [[TMP36]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS5]])
+// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK1-NEXT: br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
+// CHECK1: omp_offload.failed6:
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68(i64 [[TMP17]]) #[[ATTR2]]
+// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT7]]
+// CHECK1: omp_offload.cont7:
// CHECK1-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK1: omp_if.else:
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68() #[[ATTR2]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68(i64 [[TMP17]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1: omp_if.end:
// CHECK1-NEXT: ret i32 0
@@ -1117,78 +840,8 @@ int main() {
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l60.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK1-NEXT: ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l60.omp_outlined.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]]
-// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: br label [[COND_END]]
-// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// CHECK1-NEXT: call void @_Z3fn1v()
@@ -1196,14 +849,14 @@ int main() {
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK1-NEXT: ret void
//
//
@@ -1226,7 +879,6 @@ int main() {
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
@@ -1256,82 +908,8 @@ int main() {
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64.omp_outlined.omp_outlined(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]]
-// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK1-NEXT: ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64.omp_outlined.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]]
-// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: br label [[COND_END]]
-// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// CHECK1-NEXT: call void @_Z3fn2v()
@@ -1339,29 +917,38 @@ int main() {
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68
-// CHECK1-SAME: () #[[ATTR1]] {
+// CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
+// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined, i64 [[TMP1]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
@@ -1371,6 +958,7 @@ int main() {
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
@@ -1398,78 +986,8 @@ int main() {
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK1-NEXT: ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]]
-// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: br label [[COND_END]]
-// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// CHECK1-NEXT: call void @_Z3fn3v()
@@ -1477,14 +995,14 @@ int main() {
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK1-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp
index 7cc148f4c4ee7cd..c4d3729051994d4 100644
--- a/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp
@@ -326,7 +326,7 @@ int main() {
// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.region_id, ptr [[KERNEL_ARGS]])
// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
@@ -340,7 +340,7 @@ int main() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124
// CHECK1-SAME: () #[[ATTR4:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined)
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined)
// CHECK1-NEXT: ret void
//
//
@@ -403,149 +403,48 @@ int main() {
// CHECK1: omp.inner.for.cond.cleanup:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]])
-// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
-// CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2
-// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
-// CHECK1: arraydestroy.body:
-// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
-// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
-// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]]
-// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]]
-// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK1: arraydestroy.done3:
-// CHECK1-NEXT: ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
-// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4
-// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4
-// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
-// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2
-// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
-// CHECK1: arrayctor.loop:
-// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ]
-// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
-// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i64 1
-// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
-// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
-// CHECK1: arrayctor.cont:
-// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]])
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]]
-// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: br label [[COND_END]]
-// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
-// CHECK1: omp.inner.for.cond.cleanup:
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64
-// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]]
-// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false)
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4
-// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4
+// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64
+// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]]
+// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false)
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4
+// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
+// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
+// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]])
// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
-// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2
+// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2
// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK1: arraydestroy.body:
-// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]]
-// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]]
-// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK1: arraydestroy.done8:
+// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]]
+// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]]
+// CHECK1: arraydestroy.done7:
// CHECK1-NEXT: ret void
//
//
@@ -596,7 +495,7 @@ int main() {
// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.region_id, ptr [[KERNEL_ARGS]])
// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
@@ -645,7 +544,7 @@ int main() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80
// CHECK1-SAME: () #[[ATTR4]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined)
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined)
// CHECK1-NEXT: ret void
//
//
@@ -711,149 +610,45 @@ int main() {
// CHECK1: omp.inner.for.cond.cleanup:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP14]])
-// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
-// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2
-// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
-// CHECK1: arraydestroy.body:
-// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
-// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
-// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]]
-// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]]
-// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK1: arraydestroy.done5:
-// CHECK1-NEXT: ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
-// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4
-// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4
-// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
-// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2
-// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
-// CHECK1: arrayctor.loop:
-// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ]
-// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
-// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1
-// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
-// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
-// CHECK1: arrayctor.cont:
-// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]])
-// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]]
-// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: br label [[COND_END]]
-// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
-// CHECK1: omp.inner.for.cond.cleanup:
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64
-// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]]
-// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false)
+// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64
+// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]]
+// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false)
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1
+// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
+// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]])
// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
-// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2
+// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2
// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK1: arraydestroy.body:
-// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]]
-// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]]
-// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK1: arraydestroy.done9:
+// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]]
+// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]]
+// CHECK1: arraydestroy.done8:
// CHECK1-NEXT: ret void
//
//
@@ -1066,7 +861,7 @@ int main() {
// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4
// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK3-NEXT: store i32 0, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.region_id, ptr [[KERNEL_ARGS]])
// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
// CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
@@ -1080,7 +875,7 @@ int main() {
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124
// CHECK3-SAME: () #[[ATTR4:[0-9]+]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined)
+// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined)
// CHECK3-NEXT: ret void
//
//
@@ -1143,138 +938,41 @@ int main() {
// CHECK3: omp.inner.for.cond.cleanup:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]])
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK3: omp.inner.for.end:
-// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]])
-// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
-// CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2
-// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
-// CHECK3: arraydestroy.body:
-// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
-// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
-// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]]
-// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]]
-// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK3: arraydestroy.done3:
-// CHECK3-NEXT: ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] {
-// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
-// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4
-// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4
-// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
-// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2
-// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
-// CHECK3: arrayctor.loop:
-// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ]
-// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
-// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i32 1
-// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
-// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
-// CHECK3: arrayctor.cont:
-// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]])
-// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
-// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK3: cond.true:
-// CHECK3-NEXT: br label [[COND_END:%.*]]
-// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: br label [[COND_END]]
-// CHECK3: cond.end:
-// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK3: omp.inner.for.cond:
// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
-// CHECK3: omp.inner.for.cond.cleanup:
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
-// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]]
-// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]]
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]]
+// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP10]]
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false)
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4
-// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4
+// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
+// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]])
// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
// CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2
// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK3: arraydestroy.body:
-// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]]
// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]]
@@ -1330,7 +1028,7 @@ int main() {
// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4
// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
// CHECK3-NEXT: store i32 0, ptr [[TMP12]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.region_id, ptr [[KERNEL_ARGS]])
// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
// CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
@@ -1379,7 +1077,7 @@ int main() {
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80
// CHECK3-SAME: () #[[ATTR4]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined)
+// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined)
// CHECK3-NEXT: ret void
//
//
@@ -1445,138 +1143,38 @@ int main() {
// CHECK3: omp.inner.for.cond.cleanup:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]])
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK3: omp.inner.for.end:
-// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]])
-// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
-// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2
-// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
-// CHECK3: arraydestroy.body:
-// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
-// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
-// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]]
-// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]]
-// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK3: arraydestroy.done5:
-// CHECK3-NEXT: ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] {
-// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
-// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4
-// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4
-// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
-// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2
-// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
-// CHECK3: arrayctor.loop:
-// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ]
-// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
-// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1
-// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
-// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
-// CHECK3: arrayctor.cont:
-// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]])
-// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
-// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK3: cond.true:
-// CHECK3-NEXT: br label [[COND_END:%.*]]
-// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: br label [[COND_END]]
-// CHECK3: cond.end:
-// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK3: omp.inner.for.cond:
// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
-// CHECK3: omp.inner.for.cond.cleanup:
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
-// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]]
+// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4
// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]]
-// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]]
-// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false)
+// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]]
+// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false)
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1
// CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
+// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]])
// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2
+// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2
// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK3: arraydestroy.body:
-// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]]
// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]]
@@ -1774,7 +1372,7 @@ int main() {
// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104
// CHECK5-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK5-NEXT: entry:
-// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined)
+// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined)
// CHECK5-NEXT: ret void
//
//
@@ -1795,6 +1393,7 @@ int main() {
// CHECK5-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4
// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
// CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT: store ptr undef, ptr [[_TMP1]], align 8
@@ -1826,117 +1425,39 @@ int main() {
// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
+// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK5-NEXT: store i32 1, ptr [[G]], align 4
+// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK5-NEXT: store volatile i32 1, ptr [[TMP8]], align 4
+// CHECK5-NEXT: store i32 2, ptr [[SIVAR]], align 4
+// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK5-NEXT: store ptr [[G]], ptr [[TMP9]], align 8
+// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1
+// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK5-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
+// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2
+// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP12]], align 8
+// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]])
+// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// CHECK5: omp.body.continue:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK5: omp.loop.exit:
-// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]])
+// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK5-NEXT: ret void
//
//
-// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined.omp_outlined
-// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] {
-// CHECK5-NEXT: entry:
-// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8
-// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
-// CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK5-NEXT: store ptr undef, ptr [[_TMP1]], align 8
-// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
-// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK5-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8
-// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
-// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK5: cond.true:
-// CHECK5-NEXT: br label [[COND_END:%.*]]
-// CHECK5: cond.false:
-// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK5-NEXT: br label [[COND_END]]
-// CHECK5: cond.end:
-// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
-// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK5-NEXT: store i32 1, ptr [[G]], align 4
-// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8
-// CHECK5-NEXT: store volatile i32 1, ptr [[TMP10]], align 4
-// CHECK5-NEXT: store i32 2, ptr [[SIVAR]], align 4
-// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0
-// CHECK5-NEXT: store ptr [[G]], ptr [[TMP11]], align 8
-// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1
-// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8
-// CHECK5-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2
-// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8
-// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]])
-// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
-// CHECK5: omp.body.continue:
-// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK5: omp.inner.for.end:
-// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK5: omp.loop.exit:
-// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
-// CHECK5-NEXT: ret void
-//
-//
-// CHECK5-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_target_teams_generic_loop_private_codegen.cpp
-// CHECK5-SAME: () #[[ATTR0]] {
+// CHECK5-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_target_teams_generic_loop_private_codegen.cpp
+// CHECK5-SAME: () #[[ATTR0]] {
// CHECK5-NEXT: entry:
// CHECK5-NEXT: call void @__cxx_global_var_init()
// CHECK5-NEXT: call void @__cxx_global_var_init.1()
@@ -1956,7 +1477,7 @@ int main() {
// CHECK13-NEXT: entry:
// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// CHECK13-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined)
+// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined)
// CHECK13-NEXT: ret void
//
//
@@ -2019,35 +1540,48 @@ int main() {
// CHECK13: omp.inner.for.cond.cleanup:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK13: omp.inner.for.body:
-// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
+// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
+// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64
+// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]]
+// CHECK13-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4
+// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4
+// CHECK13-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64
+// CHECK13-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]]
+// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false)
+// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4
+// CHECK13-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
+// CHECK13-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4
+// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// CHECK13: omp.body.continue:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13: omp.inner.for.inc:
-// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
+// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK13-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK13: omp.inner.for.end:
// CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK13: omp.loop.exit:
-// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]])
+// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
+// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]])
// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]]
-// CHECK13-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
-// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2
+// CHECK13-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
+// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2
// CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK13: arraydestroy.body:
-// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]]
-// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]]
-// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK13: arraydestroy.done3:
+// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]]
+// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]]
+// CHECK13: arraydestroy.done7:
// CHECK13-NEXT: ret void
//
//
@@ -2061,120 +1595,6 @@ int main() {
// CHECK13-NEXT: ret void
//
//
-// CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined
-// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] {
-// CHECK13-NEXT: entry:
-// CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
-// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4
-// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4
-// CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
-// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2
-// CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
-// CHECK13: arrayctor.loop:
-// CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ]
-// CHECK13-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]]
-// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i64 1
-// CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
-// CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
-// CHECK13: arrayctor.cont:
-// CHECK13-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]]
-// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
-// CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK13: cond.true:
-// CHECK13-NEXT: br label [[COND_END:%.*]]
-// CHECK13: cond.false:
-// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK13-NEXT: br label [[COND_END]]
-// CHECK13: cond.end:
-// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK13: omp.inner.for.cond:
-// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
-// CHECK13: omp.inner.for.cond.cleanup:
-// CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
-// CHECK13: omp.inner.for.body:
-// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
-// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4
-// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
-// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]]
-// CHECK13-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4
-// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4
-// CHECK13-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64
-// CHECK13-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]]
-// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false)
-// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
-// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4
-// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
-// CHECK13-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4
-// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
-// CHECK13: omp.body.continue:
-// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK13: omp.inner.for.inc:
-// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
-// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK13: omp.inner.for.end:
-// CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK13: omp.loop.exit:
-// CHECK13-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
-// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]]
-// CHECK13-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
-// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2
-// CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
-// CHECK13: arraydestroy.body:
-// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
-// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
-// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]]
-// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]]
-// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK13: arraydestroy.done8:
-// CHECK13-NEXT: ret void
-//
-//
// CHECK13-LABEL: define {{[^@]+}}@_ZN1SIfED1Ev
// CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat {
// CHECK13-NEXT: entry:
@@ -2190,7 +1610,7 @@ int main() {
// CHECK13-NEXT: entry:
// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// CHECK13-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined)
+// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined)
// CHECK13-NEXT: ret void
//
//
@@ -2256,35 +1676,45 @@ int main() {
// CHECK13: omp.inner.for.cond.cleanup:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK13: omp.inner.for.body:
-// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
+// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
+// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64
+// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]]
+// CHECK13-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4
+// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK13-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64
+// CHECK13-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]]
+// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false)
+// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// CHECK13: omp.body.continue:
// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK13: omp.inner.for.inc:
-// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
+// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1
+// CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK13: omp.inner.for.end:
// CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK13: omp.loop.exit:
// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP14]])
+// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]])
// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]]
-// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
-// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2
+// CHECK13-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
+// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2
// CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK13: arraydestroy.body:
// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]]
-// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]]
-// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK13: arraydestroy.done5:
+// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]]
+// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]]
+// CHECK13: arraydestroy.done8:
// CHECK13-NEXT: ret void
//
//
@@ -2298,120 +1728,6 @@ int main() {
// CHECK13-NEXT: ret void
//
//
-// CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined
-// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] {
-// CHECK13-NEXT: entry:
-// CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
-// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4
-// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4
-// CHECK13-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8
-// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK13-NEXT: store ptr undef, ptr [[_TMP1]], align 8
-// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK13-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
-// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
-// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2
-// CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
-// CHECK13: arrayctor.loop:
-// CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ]
-// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]]
-// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1
-// CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
-// CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
-// CHECK13: arrayctor.cont:
-// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]]
-// CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8
-// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
-// CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK13: cond.true:
-// CHECK13-NEXT: br label [[COND_END:%.*]]
-// CHECK13: cond.false:
-// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK13-NEXT: br label [[COND_END]]
-// CHECK13: cond.end:
-// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK13: omp.inner.for.cond:
-// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
-// CHECK13: omp.inner.for.cond.cleanup:
-// CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
-// CHECK13: omp.inner.for.body:
-// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
-// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4
-// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
-// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]]
-// CHECK13-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4
-// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8
-// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
-// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64
-// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]]
-// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false)
-// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
-// CHECK13: omp.body.continue:
-// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK13: omp.inner.for.inc:
-// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4
-// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK13: omp.inner.for.end:
-// CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK13: omp.loop.exit:
-// CHECK13-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
-// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]]
-// CHECK13-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
-// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2
-// CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
-// CHECK13: arraydestroy.body:
-// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
-// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
-// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]]
-// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]]
-// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK13: arraydestroy.done9:
-// CHECK13-NEXT: ret void
-//
-//
// CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev
// CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat {
// CHECK13-NEXT: entry:
@@ -2470,7 +1786,7 @@ int main() {
// CHECK15-NEXT: entry:
// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
// CHECK15-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined)
+// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined)
// CHECK15-NEXT: ret void
//
//
@@ -2533,148 +1849,41 @@ int main() {
// CHECK15: omp.inner.for.cond.cleanup:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK15: omp.inner.for.body:
-// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]])
-// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK15: omp.inner.for.inc:
-// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK15: omp.inner.for.end:
-// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK15: omp.loop.exit:
-// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]])
-// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]]
-// CHECK15-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
-// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2
-// CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
-// CHECK15: arraydestroy.body:
-// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
-// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
-// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]]
-// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]]
-// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK15: arraydestroy.done3:
-// CHECK15-NEXT: ret void
-//
-//
-// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev
-// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
-// CHECK15-NEXT: entry:
-// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
-// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
-// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
-// CHECK15-NEXT: call void @_ZN1SIfEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]]
-// CHECK15-NEXT: ret void
-//
-//
-// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined
-// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] {
-// CHECK15-NEXT: entry:
-// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
-// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4
-// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4
-// CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
-// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2
-// CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
-// CHECK15: arrayctor.loop:
-// CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ]
-// CHECK15-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]]
-// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i32 1
-// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
-// CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
-// CHECK15: arrayctor.cont:
-// CHECK15-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]]
-// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
-// CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK15: cond.true:
-// CHECK15-NEXT: br label [[COND_END:%.*]]
-// CHECK15: cond.false:
-// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK15-NEXT: br label [[COND_END]]
-// CHECK15: cond.end:
-// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK15: omp.inner.for.cond:
// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
-// CHECK15: omp.inner.for.cond.cleanup:
-// CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
-// CHECK15: omp.inner.for.body:
-// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4
-// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]]
-// CHECK15-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4
-// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4
-// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]]
+// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]]
+// CHECK15-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4
+// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4
+// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP10]]
// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false)
-// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
-// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4
-// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
+// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4
+// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
// CHECK15-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK15: omp.body.continue:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK15: omp.inner.for.inc:
-// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1
+// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK15-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK15: omp.inner.for.end:
// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK15: omp.loop.exit:
-// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
-// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]]
+// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
+// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]])
+// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]]
// CHECK15-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
-// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2
+// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2
// CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK15: arraydestroy.body:
-// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]]
// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]]
@@ -2683,6 +1892,16 @@ int main() {
// CHECK15-NEXT: ret void
//
//
+// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev
+// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
+// CHECK15-NEXT: entry:
+// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
+// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
+// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
+// CHECK15-NEXT: call void @_ZN1SIfEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]]
+// CHECK15-NEXT: ret void
+//
+//
// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIfED1Ev
// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
// CHECK15-NEXT: entry:
@@ -2698,7 +1917,7 @@ int main() {
// CHECK15-NEXT: entry:
// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
// CHECK15-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined)
+// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined)
// CHECK15-NEXT: ret void
//
//
@@ -2764,148 +1983,38 @@ int main() {
// CHECK15: omp.inner.for.cond.cleanup:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK15: omp.inner.for.body:
-// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]])
-// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK15: omp.inner.for.inc:
-// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK15: omp.inner.for.end:
-// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK15: omp.loop.exit:
-// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]])
-// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]]
-// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
-// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2
-// CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
-// CHECK15: arraydestroy.body:
-// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
-// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
-// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]]
-// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]]
-// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK15: arraydestroy.done5:
-// CHECK15-NEXT: ret void
-//
-//
-// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev
-// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
-// CHECK15-NEXT: entry:
-// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
-// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
-// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
-// CHECK15-NEXT: call void @_ZN1SIiEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]]
-// CHECK15-NEXT: ret void
-//
-//
-// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined
-// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] {
-// CHECK15-NEXT: entry:
-// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4
-// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
-// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4
-// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4
-// CHECK15-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4
-// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK15-NEXT: store ptr undef, ptr [[_TMP1]], align 4
-// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
-// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2
-// CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
-// CHECK15: arrayctor.loop:
-// CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ]
-// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]]
-// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1
-// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
-// CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
-// CHECK15: arrayctor.cont:
-// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]]
-// CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4
-// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
-// CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK15: cond.true:
-// CHECK15-NEXT: br label [[COND_END:%.*]]
-// CHECK15: cond.false:
-// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK15-NEXT: br label [[COND_END]]
-// CHECK15: cond.end:
-// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK15: omp.inner.for.cond:
// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
-// CHECK15: omp.inner.for.cond.cleanup:
-// CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
-// CHECK15: omp.inner.for.body:
-// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]]
+// CHECK15-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4
+// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4
// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]]
-// CHECK15-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4
-// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4
-// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
-// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]]
-// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false)
+// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]]
+// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false)
// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK15: omp.body.continue:
// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK15: omp.inner.for.inc:
-// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1
+// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1
// CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4
// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK15: omp.inner.for.end:
// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK15: omp.loop.exit:
-// CHECK15-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
+// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
+// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]])
// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]]
// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
-// CHECK15-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2
+// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2
// CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK15: arraydestroy.body:
-// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]]
// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]]
@@ -2914,6 +2023,16 @@ int main() {
// CHECK15-NEXT: ret void
//
//
+// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev
+// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
+// CHECK15-NEXT: entry:
+// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
+// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
+// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
+// CHECK15-NEXT: call void @_ZN1SIiEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]]
+// CHECK15-NEXT: ret void
+//
+//
// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev
// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
// CHECK15-NEXT: entry:
@@ -2972,7 +2091,7 @@ int main() {
// CHECK17-NEXT: entry:
// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined)
+// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined)
// CHECK17-NEXT: ret void
//
//
@@ -2993,6 +2112,7 @@ int main() {
// CHECK17-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
// CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4
// CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
// CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK17-NEXT: store ptr undef, ptr [[_TMP1]], align 8
@@ -3024,111 +2144,33 @@ int main() {
// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK17: omp.inner.for.body:
-// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
-// CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK17: omp.inner.for.inc:
-// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK17: omp.inner.for.end:
-// CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK17: omp.loop.exit:
-// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]])
-// CHECK17-NEXT: ret void
-//
-//
-// CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined.omp_outlined
-// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] {
-// CHECK17-NEXT: entry:
-// CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
-// CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK17-NEXT: store ptr undef, ptr [[_TMP1]], align 8
-// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK17-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK17-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
-// CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK17-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8
-// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
-// CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK17: cond.true:
-// CHECK17-NEXT: br label [[COND_END:%.*]]
-// CHECK17: cond.false:
-// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK17-NEXT: br label [[COND_END]]
-// CHECK17: cond.end:
-// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK17: omp.inner.for.cond:
// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK17: omp.inner.for.body:
-// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// CHECK17-NEXT: store i32 1, ptr [[G]], align 4
-// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8
-// CHECK17-NEXT: store volatile i32 1, ptr [[TMP10]], align 4
+// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK17-NEXT: store volatile i32 1, ptr [[TMP8]], align 4
// CHECK17-NEXT: store i32 2, ptr [[SIVAR]], align 4
-// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0
-// CHECK17-NEXT: store ptr [[G]], ptr [[TMP11]], align 8
-// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1
-// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8
-// CHECK17-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2
-// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8
+// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK17-NEXT: store ptr [[G]], ptr [[TMP9]], align 8
+// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1
+// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK17-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
+// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2
+// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP12]], align 8
// CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]]
// CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK17: omp.body.continue:
// CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK17: omp.inner.for.inc:
-// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4
+// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK17: omp.inner.for.end:
// CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK17: omp.loop.exit:
-// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK17-NEXT: ret void
//
diff --git a/clang/test/OpenMP/target_teams_generic_loop_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_uses_allocators_codegen.cpp
index 64f0dced135f8b7..3aa8e88f9217c24 100644
--- a/clang/test/OpenMP/target_teams_generic_loop_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_generic_loop_uses_allocators_codegen.cpp
@@ -400,7 +400,7 @@ void foo() {
// CHECK: omp.inner.for.end:
// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK: omp.loop.exit:
-// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP1]])
+// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
// CHECK-NEXT: ret void
//
//
@@ -434,7 +434,7 @@ void foo() {
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -470,7 +470,7 @@ void foo() {
// CHECK: omp.inner.for.end:
// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK: omp.loop.exit:
-// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
+// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
// CHECK-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp b/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp
index d0ae405306b8ed6..8394ddc286c1538 100644
--- a/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp
+++ b/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp
@@ -434,133 +434,35 @@ int main (int argc, char **argv) {
// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined.omp_outlined, i64 [[TMP16]], i64 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]])
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP22]])
-// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
-// CHECK1: omp.precond.end:
-// CHECK1-NEXT: ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
-// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
-// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
-// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32
-// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
-// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: br label [[COND_END:%.*]]
-// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: br label [[COND_END]]
-// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
-// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4
+// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]]
// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1
+// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
+// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34
-// CHECK1-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK1-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
@@ -636,133 +538,35 @@ int main (int argc, char **argv) {
// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined.omp_outlined, i64 [[TMP16]], i64 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]])
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
-// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
-// CHECK1: omp.precond.end:
-// CHECK1-NEXT: ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
-// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
-// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
-// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32
-// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
-// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: br label [[COND_END:%.*]]
-// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: br label [[COND_END]]
-// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
-// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4
+// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]]
// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1
+// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
+// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP19]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK1-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK1-SAME: () #[[ATTR3:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK1-NEXT: ret void
@@ -1028,128 +832,34 @@ int main (int argc, char **argv) {
// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined.omp_outlined, i32 [[TMP15]], i32 [[TMP16]], ptr [[TMP0]], ptr [[TMP1]])
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK3: omp.inner.for.end:
-// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]])
-// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
-// CHECK3: omp.precond.end:
-// CHECK3-NEXT: ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined.omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] {
-// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
-// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
-// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
-// CHECK3: omp.precond.then:
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
-// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK3: cond.true:
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: br label [[COND_END:%.*]]
-// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: br label [[COND_END]]
-// CHECK3: cond.end:
-// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK3: omp.inner.for.cond:
// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
-// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1
+// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4
-// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP18]]
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4
+// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP16]]
// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1
// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
+// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34
-// CHECK3-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK3-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -1225,128 +935,34 @@ int main (int argc, char **argv) {
// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined.omp_outlined, i32 [[TMP15]], i32 [[TMP16]], ptr [[TMP0]], ptr [[TMP1]])
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK3: omp.inner.for.end:
-// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]])
-// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
-// CHECK3: omp.precond.end:
-// CHECK3-NEXT: ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined.omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] {
-// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
-// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
-// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
-// CHECK3: omp.precond.then:
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
-// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK3: cond.true:
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: br label [[COND_END:%.*]]
-// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: br label [[COND_END]]
-// CHECK3: cond.end:
-// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK3: omp.inner.for.cond:
// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
-// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1
+// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4
-// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP18]]
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4
+// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP16]]
// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1
// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
+// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP19]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK3-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK3-SAME: () #[[ATTR3:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK3-NEXT: ret void
@@ -1442,7 +1058,7 @@ int main (int argc, char **argv) {
// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
// CHECK9-NEXT: br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK9: omp_offload.failed:
-// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72(i64 [[TMP4]], i64 [[TMP1]], ptr [[VLA]]) #[[ATTR4:[0-9]+]]
+// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72(i64 [[TMP4]], i64 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]]
// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK9: omp_offload.cont:
// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 0
@@ -1468,7 +1084,7 @@ int main (int argc, char **argv) {
//
//
// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined
-// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
// CHECK9-NEXT: entry:
// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -1535,136 +1151,35 @@ int main (int argc, char **argv) {
// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
-// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined.omp_outlined, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]])
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK9: omp.inner.for.end:
-// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK9: omp.loop.exit:
-// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP23]])
-// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
-// CHECK9: omp.precond.end:
-// CHECK9-NEXT: ret void
-//
-//
-// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined.omp_outlined
-// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] {
-// CHECK9-NEXT: entry:
-// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
-// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8
-// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
-// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8
-// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
-// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0
-// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
-// CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
-// CHECK9: omp.precond.then:
-// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32
-// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32
-// CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
-// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK9: cond.true:
-// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT: br label [[COND_END:%.*]]
-// CHECK9: cond.false:
-// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: br label [[COND_END]]
-// CHECK9: cond.end:
-// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ]
-// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK9: omp.inner.for.cond:
// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]]
-// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
+// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
-// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4
-// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64
+// CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4
+// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4
+// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]]
// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1
-// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1
+// CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
-// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
+// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
+// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]])
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
// CHECK9: omp.precond.end:
// CHECK9-NEXT: ret void
//
//
// CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK9-NEXT: entry:
// CHECK9-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK9-NEXT: ret void
@@ -1760,7 +1275,7 @@ int main (int argc, char **argv) {
// CHECK11-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
// CHECK11-NEXT: br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK11: omp_offload.failed:
-// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72(i32 [[TMP3]], i32 [[TMP0]], ptr [[VLA]]) #[[ATTR4:[0-9]+]]
+// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72(i32 [[TMP3]], i32 [[TMP0]], ptr [[VLA]]) #[[ATTR3:[0-9]+]]
// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK11: omp_offload.cont:
// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i32 0
@@ -1786,7 +1301,7 @@ int main (int argc, char **argv) {
//
//
// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined
-// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
// CHECK11-NEXT: entry:
// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
@@ -1853,131 +1368,34 @@ int main (int argc, char **argv) {
// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined.omp_outlined, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]])
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK11: omp.inner.for.end:
-// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK11: omp.loop.exit:
-// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]])
-// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
-// CHECK11: omp.precond.end:
-// CHECK11-NEXT: ret void
-//
-//
-// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined.omp_outlined
-// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] {
-// CHECK11-NEXT: entry:
-// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4
-// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4
-// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
-// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4
-// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
-// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0
-// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
-// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
-// CHECK11: omp.precond.then:
-// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
-// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4
-// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4
-// CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
-// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK11: cond.true:
-// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT: br label [[COND_END:%.*]]
-// CHECK11: cond.false:
-// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK11-NEXT: br label [[COND_END]]
-// CHECK11: cond.end:
-// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ]
-// CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK11: omp.inner.for.cond:
// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]]
-// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
+// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4
-// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4
-// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]]
+// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4
+// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]]
// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4
// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK11: omp.body.continue:
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1
+// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1
// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
-// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
+// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
+// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]])
// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
// CHECK11: omp.precond.end:
// CHECK11-NEXT: ret void
//
//
// CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK11-NEXT: entry:
// CHECK11-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK11-NEXT: ret void
@@ -2041,7 +1459,7 @@ int main (int argc, char **argv) {
// CHECK17-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK17-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK17: omp_offload.failed:
-// CHECK17-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108(ptr [[THIS1]]) #[[ATTR3:[0-9]+]]
+// CHECK17-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108(ptr [[THIS1]]) #[[ATTR2:[0-9]+]]
// CHECK17-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK17: omp_offload.cont:
// CHECK17-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
@@ -2061,7 +1479,7 @@ int main (int argc, char **argv) {
//
//
// CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined
-// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] {
// CHECK17-NEXT: entry:
// CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -2069,140 +1487,67 @@ int main (int argc, char **argv) {
// CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
-// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
-// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK17-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122
-// CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK17: cond.true:
-// CHECK17-NEXT: br label [[COND_END:%.*]]
-// CHECK17: cond.false:
-// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK17-NEXT: br label [[COND_END]]
-// CHECK17: cond.end:
-// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
-// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK17: omp.inner.for.cond:
-// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
-// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK17: omp.inner.for.body:
-// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]])
-// CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK17: omp.inner.for.inc:
-// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK17: omp.inner.for.end:
-// CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK17: omp.loop.exit:
-// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
-// CHECK17-NEXT: ret void
-//
-//
-// CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined.omp_outlined
-// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] {
-// CHECK17-NEXT: entry:
-// CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
-// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK17-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4
-// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32
-// CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
+// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK17-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122
+// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122
// CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK17: cond.true:
// CHECK17-NEXT: br label [[COND_END:%.*]]
// CHECK17: cond.false:
-// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK17-NEXT: br label [[COND_END]]
// CHECK17: cond.end:
-// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
-// CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
+// CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK17: omp.inner.for.cond:
-// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
-// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
+// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK17: omp.inner.for.body:
-// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
+// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
+// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64
// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]]
// CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4
// CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK17: omp.body.continue:
// CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK17: omp.inner.for.inc:
-// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1
-// CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1
+// CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK17: omp.inner.for.end:
// CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK17: omp.loop.exit:
-// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
// CHECK17-NEXT: ret void
//
//
// CHECK17-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK17-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK17-SAME: () #[[ATTR3:[0-9]+]] {
// CHECK17-NEXT: entry:
// CHECK17-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK17-NEXT: ret void
@@ -2266,7 +1611,7 @@ int main (int argc, char **argv) {
// CHECK19-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK19-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK19: omp_offload.failed:
-// CHECK19-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108(ptr [[THIS1]]) #[[ATTR3:[0-9]+]]
+// CHECK19-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108(ptr [[THIS1]]) #[[ATTR2:[0-9]+]]
// CHECK19-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK19: omp_offload.cont:
// CHECK19-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
@@ -2286,7 +1631,7 @@ int main (int argc, char **argv) {
//
//
// CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined
-// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] {
// CHECK19-NEXT: entry:
// CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
@@ -2329,100 +1674,31 @@ int main (int argc, char **argv) {
// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK19: omp.inner.for.body:
-// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]])
-// CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK19: omp.inner.for.inc:
-// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
-// CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK19: omp.inner.for.end:
-// CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK19: omp.loop.exit:
-// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
-// CHECK19-NEXT: ret void
-//
-//
-// CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined.omp_outlined
-// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] {
-// CHECK19-NEXT: entry:
-// CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
-// CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK19-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
-// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
-// CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK19-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4
-// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4
-// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4
-// CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122
-// CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK19: cond.true:
-// CHECK19-NEXT: br label [[COND_END:%.*]]
-// CHECK19: cond.false:
-// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK19-NEXT: br label [[COND_END]]
-// CHECK19: cond.end:
-// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
-// CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
-// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK19: omp.inner.for.cond:
// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
-// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK19: omp.inner.for.body:
-// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
+// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]]
+// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]]
// CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4
// CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK19: omp.body.continue:
// CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK19: omp.inner.for.inc:
-// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1
+// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK19: omp.inner.for.end:
// CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK19: omp.loop.exit:
-// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
// CHECK19-NEXT: ret void
//
//
// CHECK19-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK19-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK19-SAME: () #[[ATTR3:[0-9]+]] {
// CHECK19-NEXT: entry:
// CHECK19-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK19-NEXT: ret void
@@ -2524,7 +1800,7 @@ int main (int argc, char **argv) {
// CHECK25-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
// CHECK25-NEXT: br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK25: omp_offload.failed:
-// CHECK25-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161(i64 [[TMP4]], i64 [[TMP1]], ptr [[VLA]]) #[[ATTR4:[0-9]+]]
+// CHECK25-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161(i64 [[TMP4]], i64 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]]
// CHECK25-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK25: omp_offload.cont:
// CHECK25-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
@@ -2552,7 +1828,7 @@ int main (int argc, char **argv) {
//
//
// CHECK25-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined
-// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
// CHECK25-NEXT: entry:
// CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -2619,136 +1895,35 @@ int main (int argc, char **argv) {
// CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK25-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK25: omp.inner.for.body:
-// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK25-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
-// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK25-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined.omp_outlined, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]])
-// CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK25: omp.inner.for.inc:
-// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK25-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK25: omp.inner.for.end:
-// CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK25: omp.loop.exit:
-// CHECK25-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK25-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP23]])
-// CHECK25-NEXT: br label [[OMP_PRECOND_END]]
-// CHECK25: omp.precond.end:
-// CHECK25-NEXT: ret void
-//
-//
-// CHECK25-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined.omp_outlined
-// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] {
-// CHECK25-NEXT: entry:
-// CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8
-// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
-// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK25-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8
-// CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
-// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8
-// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
-// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK25-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0
-// CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK25-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK25-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
-// CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
-// CHECK25: omp.precond.then:
-// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK25-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
-// CHECK25-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32
-// CHECK25-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK25-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32
-// CHECK25-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK25-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
-// CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK25-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK25-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
-// CHECK25-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK25: cond.true:
-// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK25-NEXT: br label [[COND_END:%.*]]
-// CHECK25: cond.false:
-// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK25-NEXT: br label [[COND_END]]
-// CHECK25: cond.end:
-// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ]
-// CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK25-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4
-// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK25: omp.inner.for.cond:
// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK25-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]]
-// CHECK25-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK25: omp.inner.for.body:
-// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
+// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK25-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
-// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4
-// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64
+// CHECK25-NEXT: store i32 [[ADD]], ptr [[I3]], align 4
+// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4
+// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]]
// CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4
// CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK25: omp.body.continue:
// CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK25: omp.inner.for.inc:
-// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK25-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1
-// CHECK25-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4
+// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK25-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1
+// CHECK25-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK25: omp.inner.for.end:
// CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK25: omp.loop.exit:
-// CHECK25-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK25-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
+// CHECK25-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
+// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]])
// CHECK25-NEXT: br label [[OMP_PRECOND_END]]
// CHECK25: omp.precond.end:
// CHECK25-NEXT: ret void
//
//
// CHECK25-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK25-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat {
+// CHECK25-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
// CHECK25-NEXT: entry:
// CHECK25-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK25-NEXT: [[A:%.*]] = alloca [10 x i32], align 4
@@ -2824,14 +1999,14 @@ int main (int argc, char **argv) {
// CHECK25-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
// CHECK25-NEXT: br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK25: omp_offload.failed:
-// CHECK25-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150(i64 [[TMP1]], i64 [[TMP3]], ptr [[A]]) #[[ATTR4]]
+// CHECK25-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150(i64 [[TMP1]], i64 [[TMP3]], ptr [[A]]) #[[ATTR3]]
// CHECK25-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK25: omp_offload.cont:
// CHECK25-NEXT: ret i32 0
//
//
// CHECK25-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150
-// CHECK25-SAME: (i64 noundef [[TE:%.*]], i64 noundef [[TH:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] {
+// CHECK25-SAME: (i64 noundef [[TE:%.*]], i64 noundef [[TH:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
// CHECK25-NEXT: entry:
// CHECK25-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8
// CHECK25-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8
@@ -2849,7 +2024,7 @@ int main (int argc, char **argv) {
//
//
// CHECK25-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined
-// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] {
+// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
// CHECK25-NEXT: entry:
// CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -2892,104 +2067,31 @@ int main (int argc, char **argv) {
// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK25-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK25: omp.inner.for.body:
-// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK25-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK25-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]])
-// CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK25: omp.inner.for.inc:
-// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK25-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK25: omp.inner.for.end:
-// CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK25: omp.loop.exit:
-// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK25-NEXT: ret void
-//
-//
-// CHECK25-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined.omp_outlined
-// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] {
-// CHECK25-NEXT: entry:
-// CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK25-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK25-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK25-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32
-// CHECK25-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK25-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK25-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9
-// CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK25: cond.true:
-// CHECK25-NEXT: br label [[COND_END:%.*]]
-// CHECK25: cond.false:
-// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK25-NEXT: br label [[COND_END]]
-// CHECK25: cond.end:
-// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
-// CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
-// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK25: omp.inner.for.cond:
// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK25-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
-// CHECK25-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK25: omp.inner.for.body:
-// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
+// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK25-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
+// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64
// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4
// CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK25: omp.body.continue:
// CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK25: omp.inner.for.inc:
-// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1
-// CHECK25-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK25-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1
+// CHECK25-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK25: omp.inner.for.end:
// CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK25: omp.loop.exit:
-// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK25-NEXT: ret void
//
//
// CHECK25-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK25-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK25-SAME: () #[[ATTR6:[0-9]+]] {
// CHECK25-NEXT: entry:
// CHECK25-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK25-NEXT: ret void
@@ -3091,7 +2193,7 @@ int main (int argc, char **argv) {
// CHECK27-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
// CHECK27-NEXT: br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK27: omp_offload.failed:
-// CHECK27-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161(i32 [[TMP3]], i32 [[TMP0]], ptr [[VLA]]) #[[ATTR4:[0-9]+]]
+// CHECK27-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161(i32 [[TMP3]], i32 [[TMP0]], ptr [[VLA]]) #[[ATTR3:[0-9]+]]
// CHECK27-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK27: omp_offload.cont:
// CHECK27-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
@@ -3119,7 +2221,7 @@ int main (int argc, char **argv) {
//
//
// CHECK27-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined
-// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
// CHECK27-NEXT: entry:
// CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
@@ -3186,131 +2288,34 @@ int main (int argc, char **argv) {
// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK27: omp.inner.for.body:
-// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined.omp_outlined, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]])
-// CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK27: omp.inner.for.inc:
-// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK27-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK27: omp.inner.for.end:
-// CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK27: omp.loop.exit:
-// CHECK27-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]])
-// CHECK27-NEXT: br label [[OMP_PRECOND_END]]
-// CHECK27: omp.precond.end:
-// CHECK27-NEXT: ret void
-//
-//
-// CHECK27-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined.omp_outlined
-// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] {
-// CHECK27-NEXT: entry:
-// CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4
-// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK27-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4
-// CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
-// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4
-// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
-// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK27-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0
-// CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK27-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK27-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
-// CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
-// CHECK27: omp.precond.then:
-// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK27-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
-// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4
-// CHECK27-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4
-// CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK27-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
-// CHECK27-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK27: cond.true:
-// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK27-NEXT: br label [[COND_END:%.*]]
-// CHECK27: cond.false:
-// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK27-NEXT: br label [[COND_END]]
-// CHECK27: cond.end:
-// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ]
-// CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK27-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4
-// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK27: omp.inner.for.cond:
// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]]
-// CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK27: omp.inner.for.body:
-// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
+// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK27-NEXT: store i32 [[ADD]], ptr [[I3]], align 4
-// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4
-// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]]
+// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4
+// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]]
// CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4
// CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK27: omp.body.continue:
// CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK27: omp.inner.for.inc:
-// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1
+// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1
// CHECK27-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK27: omp.inner.for.end:
// CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK27: omp.loop.exit:
-// CHECK27-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK27-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]])
+// CHECK27-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
+// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]])
// CHECK27-NEXT: br label [[OMP_PRECOND_END]]
// CHECK27: omp.precond.end:
// CHECK27-NEXT: ret void
//
//
// CHECK27-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK27-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat {
+// CHECK27-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
// CHECK27-NEXT: entry:
// CHECK27-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK27-NEXT: [[A:%.*]] = alloca [10 x i32], align 4
@@ -3386,14 +2391,14 @@ int main (int argc, char **argv) {
// CHECK27-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
// CHECK27-NEXT: br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK27: omp_offload.failed:
-// CHECK27-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150(i32 [[TMP1]], i32 [[TMP3]], ptr [[A]]) #[[ATTR4]]
+// CHECK27-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150(i32 [[TMP1]], i32 [[TMP3]], ptr [[A]]) #[[ATTR3]]
// CHECK27-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK27: omp_offload.cont:
// CHECK27-NEXT: ret i32 0
//
//
// CHECK27-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150
-// CHECK27-SAME: (i32 noundef [[TE:%.*]], i32 noundef [[TH:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] {
+// CHECK27-SAME: (i32 noundef [[TE:%.*]], i32 noundef [[TH:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
// CHECK27-NEXT: entry:
// CHECK27-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4
// CHECK27-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4
@@ -3411,7 +2416,7 @@ int main (int argc, char **argv) {
//
//
// CHECK27-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined
-// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] {
+// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
// CHECK27-NEXT: entry:
// CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
@@ -3454,99 +2459,30 @@ int main (int argc, char **argv) {
// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK27: omp.inner.for.body:
-// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]])
-// CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK27: omp.inner.for.inc:
-// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
-// CHECK27-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK27: omp.inner.for.end:
-// CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK27: omp.loop.exit:
-// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK27-NEXT: ret void
-//
-//
-// CHECK27-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined.omp_outlined
-// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] {
-// CHECK27-NEXT: entry:
-// CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK27-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK27-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4
-// CHECK27-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4
-// CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK27-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9
-// CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK27: cond.true:
-// CHECK27-NEXT: br label [[COND_END:%.*]]
-// CHECK27: cond.false:
-// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK27-NEXT: br label [[COND_END]]
-// CHECK27: cond.end:
-// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
-// CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
-// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK27: omp.inner.for.cond:
// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
-// CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK27: omp.inner.for.body:
-// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
+// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK27-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]]
+// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]]
// CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4
// CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK27: omp.body.continue:
// CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK27: omp.inner.for.inc:
-// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1
+// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK27-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK27: omp.inner.for.end:
// CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK27: omp.loop.exit:
-// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK27-NEXT: ret void
//
//
// CHECK27-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK27-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK27-SAME: () #[[ATTR6:[0-9]+]] {
// CHECK27-NEXT: entry:
// CHECK27-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK27-NEXT: ret void
diff --git a/clang/test/OpenMP/teams_generic_loop_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_codegen.cpp
index 2f3e70b1de58293..6d8eab13442c4a9 100644
--- a/clang/test/OpenMP/teams_generic_loop_codegen.cpp
+++ b/clang/test/OpenMP/teams_generic_loop_codegen.cpp
@@ -44,8 +44,8 @@ int foo() {
// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
// IR-NEXT: [[_TMP2:%.*]] = alloca i32, align 4
-// IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// IR-NEXT: [[J3:%.*]] = alloca i32, align 4
@@ -69,304 +69,127 @@ int foo() {
// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]]
// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// IR: omp.arrayinit.done:
-// IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// IR-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
+// IR-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99
// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// IR: cond.true:
// IR-NEXT: br label [[COND_END:%.*]]
// IR: cond.false:
-// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-NEXT: br label [[COND_END]]
// IR: cond.end:
// IR-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
-// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// IR: omp.inner.for.cond:
// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// IR-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// IR: omp.inner.for.body:
-// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// IR-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// IR-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64
-// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @_Z3foov.omp_outlined.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[J3]], ptr [[SUM1]])
+// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10
+// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
+// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[DIV6:%.*]] = sdiv i32 [[TMP12]], 10
+// IR-NEXT: [[MUL7:%.*]] = mul nsw i32 [[DIV6]], 10
+// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL7]]
+// IR-NEXT: [[MUL8:%.*]] = mul nsw i32 [[SUB]], 1
+// IR-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]]
+// IR-NEXT: store i32 [[ADD9]], ptr [[J3]], align 4
+// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
+// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4
+// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64
+// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i64 0, i64 [[IDXPROM]]
+// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[J3]], align 4
+// IR-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP15]] to i64
+// IR-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM10]]
+// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4
+// IR-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP16]], [[TMP13]]
+// IR-NEXT: store i32 [[ADD12]], ptr [[ARRAYIDX11]], align 4
+// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR: omp.body.continue:
// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// IR: omp.inner.for.inc:
-// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// IR-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], 1
+// IR-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4
// IR-NEXT: br label [[OMP_INNER_FOR_COND]]
// IR: omp.inner.for.end:
// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// IR: omp.loop.exit:
-// IR-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]])
-// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// IR-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
-// IR-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
+// IR-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4
+// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]])
+// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
+// IR-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// IR: .omp.lastprivate.then:
// IR-NEXT: store i32 10, ptr [[J3]], align 4
-// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[J3]], align 4
-// IR-NEXT: store i32 [[TMP20]], ptr [[TMP0]], align 4
+// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[J3]], align 4
+// IR-NEXT: store i32 [[TMP22]], ptr [[TMP0]], align 4
// IR-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// IR: .omp.lastprivate.done:
-// IR-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// IR-NEXT: store ptr [[SUM1]], ptr [[TMP21]], align 8
-// IR-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-// IR-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// IR-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// IR-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// IR-NEXT: store ptr [[SUM1]], ptr [[TMP23]], align 8
+// IR-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
+// IR-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// IR-NEXT: switch i32 [[TMP26]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// IR-NEXT: ]
// IR: .omp.reduction.case1:
-// IR-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
-// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP25]]
-// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
+// IR-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
+// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP27]]
+// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// IR: omp.arraycpy.body:
// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4
-// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// IR-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]]
-// IR-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4
-// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1
+// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 4
+// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// IR-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP28]], [[TMP29]]
+// IR-NEXT: store i32 [[ADD15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 4
+// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1
// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// IR-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP25]]
-// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]]
-// IR: omp.arraycpy.done10:
-// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var)
-// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
-// IR: .omp.reduction.case2:
-// IR-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
-// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP1]], [[TMP28]]
-// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]]
-// IR: omp.arraycpy.body12:
-// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ]
-// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ]
-// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4
-// IR-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP29]] monotonic, align 4
-// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1
-// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1
-// IR-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP28]]
-// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]]
+// IR-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP27]]
+// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]]
// IR: omp.arraycpy.done18:
-// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
-// IR: .omp.reduction.default:
-// IR-NEXT: ret void
-//
-//
-// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined
-// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] {
-// IR-NEXT: entry:
-// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// IR-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8
-// IR-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8
-// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// IR-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// IR-NEXT: [[J3:%.*]] = alloca i32, align 4
-// IR-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16
-// IR-NEXT: [[I:%.*]] = alloca i32, align 4
-// IR-NEXT: [[J5:%.*]] = alloca i32, align 4
-// IR-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
-// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// IR-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8
-// IR-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8
-// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8
-// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8
-// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// IR-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// IR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32
-// IR-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// IR-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32
-// IR-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// IR-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
-// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// IR-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0
-// IR-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100
-// IR-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]]
-// IR-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
-// IR: omp.arrayinit.body:
-// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
-// IR-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
-// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
-// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]]
-// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
-// IR: omp.arrayinit.done:
-// IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99
-// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// IR: cond.true:
-// IR-NEXT: br label [[COND_END:%.*]]
-// IR: cond.false:
-// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// IR-NEXT: br label [[COND_END]]
-// IR: cond.end:
-// IR-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ]
-// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// IR-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
-// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// IR: omp.inner.for.cond:
-// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
-// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]]
-// IR-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// IR: omp.inner.for.body:
-// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP12]], 10
-// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
-// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP14]], 10
-// IR-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10
-// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], [[MUL8]]
-// IR-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1
-// IR-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]]
-// IR-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64
-// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]]
-// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP17]] to i64
-// IR-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]]
-// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP18]], [[TMP15]]
-// IR-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
-// IR: omp.body.continue:
-// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// IR: omp.inner.for.inc:
-// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], 1
-// IR-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
-// IR: omp.inner.for.end:
-// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// IR: omp.loop.exit:
-// IR-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
-// IR-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// IR-NEXT: store ptr [[SUM4]], ptr [[TMP22]], align 8
-// IR-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// IR-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// IR-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
-// IR-NEXT: ]
-// IR: .omp.reduction.case1:
-// IR-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
-// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP26]]
-// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
-// IR: omp.arraycpy.body:
-// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4
-// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// IR-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP27]], [[TMP28]]
-// IR-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4
-// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1
-// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// IR-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP26]]
-// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]]
-// IR: omp.arraycpy.done19:
-// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
+// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var)
// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR: .omp.reduction.case2:
-// IR-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
-// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP1]], [[TMP29]]
-// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]]
-// IR: omp.arraycpy.body21:
-// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ]
-// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ]
-// IR-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4
-// IR-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP30]] monotonic, align 4
-// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1
-// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1
-// IR-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP29]]
-// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]]
-// IR: omp.arraycpy.done27:
+// IR-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
+// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[TMP1]], [[TMP30]]
+// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]]
+// IR: omp.arraycpy.body20:
+// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY20]] ]
+// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY20]] ]
+// IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 4
+// IR-NEXT: [[TMP32:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 [[TMP31]] monotonic, align 4
+// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1
+// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1
+// IR-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP30]]
+// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY20]]
+// IR: omp.arraycpy.done26:
// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR: .omp.reduction.default:
-// IR-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// IR-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
-// IR-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
-// IR: .omp.lastprivate.then:
-// IR-NEXT: store i32 10, ptr [[J3]], align 4
-// IR-NEXT: [[TMP34:%.*]] = load i32, ptr [[J3]], align 4
-// IR-NEXT: store i32 [[TMP34]], ptr [[TMP0]], align 4
-// IR-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
-// IR: .omp.lastprivate.done:
-// IR-NEXT: ret void
-//
-//
-// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func
-// IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
-// IR-NEXT: entry:
-// IR-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// IR-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
-// IR-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// IR-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
-// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// IR-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
-// IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
-// IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// IR-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// IR-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100
-// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
-// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
-// IR: omp.arraycpy.body:
-// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
-// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// IR-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
-// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
-// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
-// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
-// IR: omp.arraycpy.done2:
// IR-NEXT: ret void
//
//
// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp.reduction.reduction_func
-// IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] {
+// IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
// IR-NEXT: entry:
// IR-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// IR-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
@@ -417,8 +240,8 @@ int foo() {
// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[_TMP2:%.*]] = alloca i32, align 4
-// IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[J3:%.*]] = alloca i32, align 4
@@ -442,304 +265,127 @@ int foo() {
// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]]
// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
// IR-PCH: omp.arrayinit.done:
-// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// IR-PCH-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
+// IR-PCH-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99
// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// IR-PCH: cond.true:
// IR-PCH-NEXT: br label [[COND_END:%.*]]
// IR-PCH: cond.false:
-// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: br label [[COND_END]]
// IR-PCH: cond.end:
// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
-// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// IR-PCH: omp.inner.for.cond:
// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// IR-PCH-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// IR-PCH-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// IR-PCH: omp.inner.for.body:
-// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// IR-PCH-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// IR-PCH-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64
-// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @_Z3foov.omp_outlined.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[J3]], ptr [[SUM1]])
+// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10
+// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
+// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[DIV6:%.*]] = sdiv i32 [[TMP12]], 10
+// IR-PCH-NEXT: [[MUL7:%.*]] = mul nsw i32 [[DIV6]], 10
+// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL7]]
+// IR-PCH-NEXT: [[MUL8:%.*]] = mul nsw i32 [[SUB]], 1
+// IR-PCH-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]]
+// IR-PCH-NEXT: store i32 [[ADD9]], ptr [[J3]], align 4
+// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
+// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4
+// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64
+// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i64 0, i64 [[IDXPROM]]
+// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[J3]], align 4
+// IR-PCH-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP15]] to i64
+// IR-PCH-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM10]]
+// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4
+// IR-PCH-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP16]], [[TMP13]]
+// IR-PCH-NEXT: store i32 [[ADD12]], ptr [[ARRAYIDX11]], align 4
+// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR-PCH: omp.body.continue:
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// IR-PCH: omp.inner.for.inc:
-// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// IR-PCH-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-PCH-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], 1
+// IR-PCH-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4
// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]]
// IR-PCH: omp.inner.for.end:
// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// IR-PCH: omp.loop.exit:
-// IR-PCH-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]])
-// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// IR-PCH-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
-// IR-PCH-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
+// IR-PCH-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4
+// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]])
+// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-PCH-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
+// IR-PCH-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// IR-PCH: .omp.lastprivate.then:
// IR-PCH-NEXT: store i32 10, ptr [[J3]], align 4
-// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[J3]], align 4
-// IR-PCH-NEXT: store i32 [[TMP20]], ptr [[TMP0]], align 4
+// IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[J3]], align 4
+// IR-PCH-NEXT: store i32 [[TMP22]], ptr [[TMP0]], align 4
// IR-PCH-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// IR-PCH: .omp.lastprivate.done:
-// IR-PCH-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// IR-PCH-NEXT: store ptr [[SUM1]], ptr [[TMP21]], align 8
-// IR-PCH-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
-// IR-PCH-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// IR-PCH-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// IR-PCH-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// IR-PCH-NEXT: store ptr [[SUM1]], ptr [[TMP23]], align 8
+// IR-PCH-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
+// IR-PCH-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// IR-PCH-NEXT: switch i32 [[TMP26]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
// IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
// IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// IR-PCH-NEXT: ]
// IR-PCH: .omp.reduction.case1:
-// IR-PCH-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP25]]
-// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
+// IR-PCH-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
+// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP27]]
+// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
// IR-PCH: omp.arraycpy.body:
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4
-// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// IR-PCH-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]]
-// IR-PCH-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1
+// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 4
+// IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// IR-PCH-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP28]], [[TMP29]]
+// IR-PCH-NEXT: store i32 [[ADD15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 4
+// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1
// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP25]]
-// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]]
-// IR-PCH: omp.arraycpy.done10:
-// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var)
-// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
-// IR-PCH: .omp.reduction.case2:
-// IR-PCH-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP1]], [[TMP28]]
-// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]]
-// IR-PCH: omp.arraycpy.body12:
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ]
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ]
-// IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4
-// IR-PCH-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP29]] monotonic, align 4
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP28]]
-// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]]
+// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP27]]
+// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]]
// IR-PCH: omp.arraycpy.done18:
-// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
-// IR-PCH: .omp.reduction.default:
-// IR-PCH-NEXT: ret void
-//
-//
-// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined
-// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] {
-// IR-PCH-NEXT: entry:
-// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// IR-PCH-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8
-// IR-PCH-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8
-// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// IR-PCH-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// IR-PCH-NEXT: [[J3:%.*]] = alloca i32, align 4
-// IR-PCH-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16
-// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
-// IR-PCH-NEXT: [[J5:%.*]] = alloca i32, align 4
-// IR-PCH-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
-// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// IR-PCH-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8
-// IR-PCH-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8
-// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8
-// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8
-// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// IR-PCH-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// IR-PCH-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32
-// IR-PCH-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// IR-PCH-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32
-// IR-PCH-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// IR-PCH-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
-// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// IR-PCH-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0
-// IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100
-// IR-PCH-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]]
-// IR-PCH-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
-// IR-PCH: omp.arrayinit.body:
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
-// IR-PCH-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]]
-// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
-// IR-PCH: omp.arrayinit.done:
-// IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99
-// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// IR-PCH: cond.true:
-// IR-PCH-NEXT: br label [[COND_END:%.*]]
-// IR-PCH: cond.false:
-// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// IR-PCH-NEXT: br label [[COND_END]]
-// IR-PCH: cond.end:
-// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ]
-// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// IR-PCH-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
-// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// IR-PCH: omp.inner.for.cond:
-// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
-// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]]
-// IR-PCH-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// IR-PCH: omp.inner.for.body:
-// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP12]], 10
-// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
-// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP14]], 10
-// IR-PCH-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10
-// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], [[MUL8]]
-// IR-PCH-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1
-// IR-PCH-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]]
-// IR-PCH-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64
-// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]]
-// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP17]] to i64
-// IR-PCH-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]]
-// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP18]], [[TMP15]]
-// IR-PCH-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
-// IR-PCH: omp.body.continue:
-// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// IR-PCH: omp.inner.for.inc:
-// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], 1
-// IR-PCH-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
-// IR-PCH: omp.inner.for.end:
-// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// IR-PCH: omp.loop.exit:
-// IR-PCH-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]])
-// IR-PCH-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// IR-PCH-NEXT: store ptr [[SUM4]], ptr [[TMP22]], align 8
-// IR-PCH-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
-// IR-PCH-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// IR-PCH-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
-// IR-PCH-NEXT: ]
-// IR-PCH: .omp.reduction.case1:
-// IR-PCH-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP26]]
-// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
-// IR-PCH: omp.arraycpy.body:
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4
-// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// IR-PCH-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP27]], [[TMP28]]
-// IR-PCH-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP26]]
-// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]]
-// IR-PCH: omp.arraycpy.done19:
-// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
+// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var)
// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR-PCH: .omp.reduction.case2:
-// IR-PCH-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP1]], [[TMP29]]
-// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]]
-// IR-PCH: omp.arraycpy.body21:
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ]
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ]
-// IR-PCH-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4
-// IR-PCH-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP30]] monotonic, align 4
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP29]]
-// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]]
-// IR-PCH: omp.arraycpy.done27:
+// IR-PCH-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100
+// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[TMP1]], [[TMP30]]
+// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]]
+// IR-PCH: omp.arraycpy.body20:
+// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY20]] ]
+// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY20]] ]
+// IR-PCH-NEXT: [[TMP31:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 4
+// IR-PCH-NEXT: [[TMP32:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 [[TMP31]] monotonic, align 4
+// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1
+// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1
+// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP30]]
+// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY20]]
+// IR-PCH: omp.arraycpy.done26:
// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// IR-PCH: .omp.reduction.default:
-// IR-PCH-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// IR-PCH-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
-// IR-PCH-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
-// IR-PCH: .omp.lastprivate.then:
-// IR-PCH-NEXT: store i32 10, ptr [[J3]], align 4
-// IR-PCH-NEXT: [[TMP34:%.*]] = load i32, ptr [[J3]], align 4
-// IR-PCH-NEXT: store i32 [[TMP34]], ptr [[TMP0]], align 4
-// IR-PCH-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
-// IR-PCH: .omp.lastprivate.done:
-// IR-PCH-NEXT: ret void
-//
-//
-// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func
-// IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
-// IR-PCH-NEXT: entry:
-// IR-PCH-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// IR-PCH-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
-// IR-PCH-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// IR-PCH-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
-// IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
-// IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
-// IR-PCH-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// IR-PCH-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// IR-PCH-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]]
-// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
-// IR-PCH: omp.arraycpy.body:
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
-// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
-// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
-// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// IR-PCH-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
-// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
-// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
-// IR-PCH: omp.arraycpy.done2:
// IR-PCH-NEXT: ret void
//
//
// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp.reduction.reduction_func
-// IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] {
+// IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
// IR-PCH-NEXT: entry:
// IR-PCH-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// IR-PCH-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
diff --git a/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp
index ce71a4620facca0..35c07845c452f69 100644
--- a/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp
@@ -161,7 +161,7 @@ int main (int argc, char **argv) {
// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK1-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR3:[0-9]+]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1: omp_offload.cont:
// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
@@ -182,7 +182,7 @@ int main (int argc, char **argv) {
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -227,119 +227,44 @@ int main (int argc, char **argv) {
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]])
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
-// CHECK1-NEXT: ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]]
-// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: br label [[COND_END]]
-// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
-// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456
+// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456
-// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456
-// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]]
-// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
-// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456
+// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456
+// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]]
+// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
+// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
+// CHECK1-NEXT: store i32 [[ADD6]], ptr [[J]], align 4
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4
-// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64
-// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]]
-// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64
+// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]]
+// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK1-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK1-SAME: () #[[ATTR3:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK1-NEXT: ret void
@@ -404,7 +329,7 @@ int main (int argc, char **argv) {
// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR3:[0-9]+]]
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3: omp_offload.cont:
// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
@@ -425,7 +350,7 @@ int main (int argc, char **argv) {
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
@@ -470,113 +395,42 @@ int main (int argc, char **argv) {
// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]])
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK3: omp.inner.for.end:
-// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
-// CHECK3-NEXT: ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] {
-// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087
-// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK3: cond.true:
-// CHECK3-NEXT: br label [[COND_END:%.*]]
-// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: br label [[COND_END]]
-// CHECK3: cond.end:
-// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK3: omp.inner.for.cond:
// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
-// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456
+// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456
// CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456
-// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]]
+// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]]
// CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4
// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]]
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4
-// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]]
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP11]]
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4
+// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]]
// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK3-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK3-SAME: () #[[ATTR3:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK3-NEXT: ret void
@@ -711,7 +565,7 @@ int main (int argc, char **argv) {
// CHECK9-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
// CHECK9-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK9: omp_offload.failed:
-// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR4:[0-9]+]]
+// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR3:[0-9]+]]
// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK9: omp_offload.cont:
// CHECK9-NEXT: [[TMP51:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
@@ -744,7 +598,7 @@ int main (int argc, char **argv) {
//
//
// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined
-// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
// CHECK9-NEXT: entry:
// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -834,185 +688,65 @@ int main (int argc, char **argv) {
// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]]
// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined, i64 [[TMP21]], i64 [[TMP22]], ptr [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP3]], ptr [[TMP4]])
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
-// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP23]], [[TMP24]]
-// CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK9: omp.inner.for.end:
-// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK9: omp.loop.exit:
-// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP26]])
-// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
-// CHECK9: omp.precond.end:
-// CHECK9-NEXT: ret void
-//
-//
-// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined
-// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] {
-// CHECK9-NEXT: entry:
-// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
-// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
-// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
-// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
-// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
-// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
-// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
-// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8
-// CHECK9-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8
-// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
-// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
-// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8
-// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 8
-// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
-// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
-// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0
-// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
-// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0
-// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
-// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
-// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
-// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
-// CHECK9-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK9-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]]
-// CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
-// CHECK9: land.lhs.true:
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
-// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]]
-// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
-// CHECK9: omp.precond.then:
-// CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
-// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
-// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8
-// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK9-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_LB]], align 8
-// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_UB]], align 8
-// CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
-// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
-// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
-// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
-// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]]
-// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK9: cond.true:
-// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
-// CHECK9-NEXT: br label [[COND_END:%.*]]
-// CHECK9: cond.false:
-// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK9-NEXT: br label [[COND_END]]
-// CHECK9: cond.end:
-// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ]
-// CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8
-// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
-// CHECK9-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK9: omp.inner.for.cond:
// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]]
-// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
-// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP24]], 0
+// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0
// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64
-// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP23]], [[CONV18]]
+// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]]
// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]]
// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32
// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4
-// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
-// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP27]], 0
+// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0
// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64
-// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP26]], [[CONV25]]
-// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
-// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]]
+// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0
// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]]
// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64
// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]]
-// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP25]], [[MUL31]]
+// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]]
// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1
// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]]
// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32
// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4
-// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I11]], align 4
-// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64
-// CHECK9-NEXT: [[TMP30:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]]
-// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP30]]
-// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4
-// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64
+// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4
+// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64
+// CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]]
+// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP28]]
+// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4
+// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP29]] to i64
// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]]
// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP32]], 1
+// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP30]], 1
// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
-// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
+// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
+// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP32]])
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
// CHECK9: omp.precond.end:
// CHECK9-NEXT: ret void
//
//
// CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat {
+// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
// CHECK9-NEXT: entry:
// CHECK9-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -1061,7 +795,7 @@ int main (int argc, char **argv) {
// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK9-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK9: omp_offload.failed:
-// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69(ptr [[A]]) #[[ATTR4]]
+// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69(ptr [[A]]) #[[ATTR3]]
// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK9: omp_offload.cont:
// CHECK9-NEXT: ret i32 0
@@ -1078,7 +812,7 @@ int main (int argc, char **argv) {
//
//
// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined
-// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR3]] {
+// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] {
// CHECK9-NEXT: entry:
// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -1123,118 +857,43 @@ int main (int argc, char **argv) {
// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]])
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK9: omp.inner.for.end:
-// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK9: omp.loop.exit:
-// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK9-NEXT: ret void
-//
-//
-// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined.omp_outlined
-// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR3]] {
-// CHECK9-NEXT: entry:
-// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32
-// CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19
-// CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK9: cond.true:
-// CHECK9-NEXT: br label [[COND_END:%.*]]
-// CHECK9: cond.false:
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: br label [[COND_END]]
-// CHECK9: cond.end:
-// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
-// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK9: omp.inner.for.cond:
// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
-// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2
+// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2
// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2
-// CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2
-// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]]
-// CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
-// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
-// CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4
-// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
-// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2
+// CHECK9-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2
+// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]]
+// CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
+// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
+// CHECK9-NEXT: store i32 [[ADD6]], ptr [[J]], align 4
+// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4
-// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64
-// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]]
-// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4
+// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4
+// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64
+// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]]
+// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
-// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK9-NEXT: ret void
//
//
// CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
// CHECK9-NEXT: entry:
// CHECK9-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK9-NEXT: ret void
@@ -1368,7 +1027,7 @@ int main (int argc, char **argv) {
// CHECK11-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0
// CHECK11-NEXT: br i1 [[TMP49]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK11: omp_offload.failed:
-// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR4:[0-9]+]]
+// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]]
// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK11: omp_offload.cont:
// CHECK11-NEXT: [[TMP50:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
@@ -1401,7 +1060,7 @@ int main (int argc, char **argv) {
//
//
// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined
-// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
// CHECK11-NEXT: entry:
// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
@@ -1491,187 +1150,63 @@ int main (int argc, char **argv) {
// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]]
// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32
-// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK11-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32
-// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined, i32 [[TMP22]], i32 [[TMP24]], ptr [[TMP0]], ptr [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], ptr [[TMP4]])
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
-// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]]
-// CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK11: omp.inner.for.end:
-// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK11: omp.loop.exit:
-// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP28]])
-// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
-// CHECK11: omp.precond.end:
-// CHECK11-NEXT: ret void
-//
-//
-// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined
-// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] {
-// CHECK11-NEXT: entry:
-// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4
-// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4
-// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
-// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
-// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
-// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
-// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
-// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[I13:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[J14:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4
-// CHECK11-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4
-// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
-// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
-// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4
-// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4
-// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
-// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
-// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4
-// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0
-// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
-// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0
-// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
-// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
-// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]]
-// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8
-// CHECK11-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK11-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]]
-// CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
-// CHECK11: land.lhs.true:
-// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
-// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]]
-// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
-// CHECK11: omp.precond.then:
-// CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
-// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
-// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8
-// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK11-NEXT: [[CONV11:%.*]] = zext i32 [[TMP12]] to i64
-// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK11-NEXT: [[CONV12:%.*]] = zext i32 [[TMP13]] to i64
-// CHECK11-NEXT: store i64 [[CONV11]], ptr [[DOTOMP_LB]], align 8
-// CHECK11-NEXT: store i64 [[CONV12]], ptr [[DOTOMP_UB]], align 8
-// CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
-// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
-// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
-// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
-// CHECK11-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]]
-// CHECK11-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK11: cond.true:
-// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8
-// CHECK11-NEXT: br label [[COND_END:%.*]]
-// CHECK11: cond.false:
-// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK11-NEXT: br label [[COND_END]]
-// CHECK11: cond.end:
-// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ]
-// CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8
-// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
-// CHECK11-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK11: omp.inner.for.cond:
// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]]
-// CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK11: omp.inner.for.body:
+// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0
+// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
+// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
+// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64
+// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]]
+// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
+// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]]
+// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32
+// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4
// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
-// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP24]], 0
-// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1
-// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]]
-// CHECK11-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64
-// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP23]], [[CONV20]]
-// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1
-// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]]
-// CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32
-// CHECK11-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4
-// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
-// CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP27]], 0
-// CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1
-// CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]]
-// CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64
-// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP26]], [[CONV27]]
-// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
-// CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP28]], 0
-// CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
-// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]]
-// CHECK11-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64
-// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]]
-// CHECK11-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP25]], [[MUL33]]
-// CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1
-// CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]]
-// CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32
-// CHECK11-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4
-// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I13]], align 4
-// CHECK11-NEXT: [[TMP30:%.*]] = mul nsw i32 [[TMP29]], [[TMP3]]
-// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[TMP30]]
-// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[J14]], align 4
-// CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP31]]
-// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX38]], align 4
+// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0
+// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
+// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
+// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64
+// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]]
+// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0
+// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
+// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]]
+// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64
+// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]]
+// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]]
+// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1
+// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]]
+// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32
+// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4
+// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4
+// CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP3]]
+// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[TMP28]]
+// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4
+// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP29]]
+// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4
// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK11: omp.body.continue:
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP32]], 1
-// CHECK11-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8
+// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP30]], 1
+// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
-// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
-// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]])
+// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
+// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP32]])
// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
// CHECK11: omp.precond.end:
// CHECK11-NEXT: ret void
//
//
// CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat {
+// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
// CHECK11-NEXT: entry:
// CHECK11-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK11-NEXT: [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -1720,7 +1255,7 @@ int main (int argc, char **argv) {
// CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK11-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK11: omp_offload.failed:
-// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69(ptr [[A]]) #[[ATTR4]]
+// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69(ptr [[A]]) #[[ATTR3]]
// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK11: omp_offload.cont:
// CHECK11-NEXT: ret i32 0
@@ -1737,7 +1272,7 @@ int main (int argc, char **argv) {
//
//
// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined
-// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR3]] {
+// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] {
// CHECK11-NEXT: entry:
// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
@@ -1782,112 +1317,41 @@ int main (int argc, char **argv) {
// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]])
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
-// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK11: omp.inner.for.end:
-// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK11: omp.loop.exit:
-// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK11-NEXT: ret void
-//
-//
-// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined.omp_outlined
-// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR3]] {
-// CHECK11-NEXT: entry:
-// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4
-// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4
-// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4
-// CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19
-// CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK11: cond.true:
-// CHECK11-NEXT: br label [[COND_END:%.*]]
-// CHECK11: cond.false:
-// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK11-NEXT: br label [[COND_END]]
-// CHECK11: cond.end:
-// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
-// CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK11: omp.inner.for.cond:
// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
-// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK11: omp.inner.for.body:
-// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2
+// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2
// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2
+// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2
// CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2
-// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]]
+// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]]
// CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4
-// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
-// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP13]]
-// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4
-// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]]
+// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP11]]
+// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4
+// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]]
// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4
// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK11: omp.body.continue:
// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK11: omp.inner.for.inc:
-// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1
+// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4
// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK11: omp.inner.for.end:
// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK11: omp.loop.exit:
-// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK11-NEXT: ret void
//
//
// CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
// CHECK11-NEXT: entry:
// CHECK11-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK11-NEXT: ret void
diff --git a/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp
index b86f1440ec9c9bb..006da82e4b0ea12 100644
--- a/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp
+++ b/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp
@@ -307,7 +307,7 @@ int main() {
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -365,154 +365,53 @@ int main() {
// CHECK1: omp.inner.for.cond.cleanup:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]])
-// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
-// CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2
-// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
-// CHECK1: arraydestroy.body:
-// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
-// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
-// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]]
-// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]]
-// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK1: arraydestroy.done3:
-// CHECK1-NEXT: ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
-// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4
-// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4
-// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
-// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2
-// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
-// CHECK1: arrayctor.loop:
-// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ]
-// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
-// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i64 1
-// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
-// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
-// CHECK1: arrayctor.cont:
-// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]])
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]]
-// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: br label [[COND_END]]
-// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
-// CHECK1: omp.inner.for.cond.cleanup:
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64
-// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]]
-// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false)
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4
-// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4
+// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64
+// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]]
+// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false)
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4
+// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
+// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
+// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP15]])
// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
-// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2
+// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2
// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK1: arraydestroy.body:
-// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]]
-// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]]
-// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK1: arraydestroy.done8:
+// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]]
+// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]]
+// CHECK1: arraydestroy.done7:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v
-// CHECK1-SAME: () #[[ATTR7:[0-9]+]] comdat {
+// CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4
@@ -612,7 +511,7 @@ int main() {
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -673,149 +572,45 @@ int main() {
// CHECK1: omp.inner.for.cond.cleanup:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP14]])
-// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
-// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2
-// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
-// CHECK1: arraydestroy.body:
-// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
-// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
-// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]]
-// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]]
-// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK1: arraydestroy.done5:
-// CHECK1-NEXT: ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
-// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4
-// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4
-// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
-// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2
-// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
-// CHECK1: arrayctor.loop:
-// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ]
-// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
-// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1
-// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
-// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
-// CHECK1: arrayctor.cont:
-// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]])
-// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]]
-// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: br label [[COND_END]]
-// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
-// CHECK1: omp.inner.for.cond.cleanup:
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64
-// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]]
-// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false)
+// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64
+// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]]
+// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false)
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1
+// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
+// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP14]])
// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
-// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2
+// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2
// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK1: arraydestroy.body:
-// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]]
-// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]]
-// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK1: arraydestroy.done9:
+// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]]
+// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]]
+// CHECK1: arraydestroy.done8:
// CHECK1-NEXT: ret void
//
//
@@ -1047,7 +842,7 @@ int main() {
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] {
+// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
@@ -1105,138 +900,41 @@ int main() {
// CHECK3: omp.inner.for.cond.cleanup:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]])
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK3: omp.inner.for.end:
-// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]])
-// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
-// CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2
-// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
-// CHECK3: arraydestroy.body:
-// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
-// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
-// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]]
-// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]]
-// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK3: arraydestroy.done3:
-// CHECK3-NEXT: ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined.omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] {
-// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
-// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4
-// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4
-// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
-// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2
-// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
-// CHECK3: arrayctor.loop:
-// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ]
-// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
-// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i32 1
-// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
-// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
-// CHECK3: arrayctor.cont:
-// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]])
-// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
-// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK3: cond.true:
-// CHECK3-NEXT: br label [[COND_END:%.*]]
-// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: br label [[COND_END]]
-// CHECK3: cond.end:
-// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK3: omp.inner.for.cond:
// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
-// CHECK3: omp.inner.for.cond.cleanup:
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
-// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]]
-// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]]
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]]
+// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP10]]
// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false)
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4
-// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4
+// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
+// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP15]])
// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
// CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2
// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK3: arraydestroy.body:
-// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]]
// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]]
@@ -1246,7 +944,7 @@ int main() {
//
//
// CHECK3-LABEL: define {{[^@]+}}@_Z5tmainIiET_v
-// CHECK3-SAME: () #[[ATTR7:[0-9]+]] comdat {
+// CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4
@@ -1346,7 +1044,7 @@ int main() {
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] {
+// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
@@ -1407,138 +1105,38 @@ int main() {
// CHECK3: omp.inner.for.cond.cleanup:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]])
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK3: omp.inner.for.end:
-// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]])
-// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
-// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2
-// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
-// CHECK3: arraydestroy.body:
-// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
-// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
-// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]]
-// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]]
-// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK3: arraydestroy.done5:
-// CHECK3-NEXT: ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined.omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] {
-// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
-// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4
-// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4
-// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
-// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2
-// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]]
-// CHECK3: arrayctor.loop:
-// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ]
-// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
-// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1
-// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
-// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
-// CHECK3: arrayctor.cont:
-// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]])
-// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
-// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK3: cond.true:
-// CHECK3-NEXT: br label [[COND_END:%.*]]
-// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: br label [[COND_END]]
-// CHECK3: cond.end:
-// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK3: omp.inner.for.cond:
// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
-// CHECK3: omp.inner.for.cond.cleanup:
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
-// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]]
+// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4
// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]]
-// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]]
-// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false)
+// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]]
+// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false)
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1
// CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]])
+// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP14]])
// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]]
// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2
+// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2
// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]]
// CHECK3: arraydestroy.body:
-// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]]
// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]]
@@ -1745,7 +1343,7 @@ int main() {
//
//
// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l75.omp_outlined
-// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR6:[0-9]+]] {
+// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] {
// CHECK9-NEXT: entry:
// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -1761,6 +1359,7 @@ int main() {
// CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8
@@ -1792,112 +1391,34 @@ int main() {
// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l75.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK9: omp.inner.for.end:
-// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK9: omp.loop.exit:
-// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]])
-// CHECK9-NEXT: ret void
-//
-//
-// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l75.omp_outlined.omp_outlined
-// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR6]] {
-// CHECK9-NEXT: entry:
-// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
-// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8
-// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8
-// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1
-// CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK9: cond.true:
-// CHECK9-NEXT: br label [[COND_END:%.*]]
-// CHECK9: cond.false:
-// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: br label [[COND_END]]
-// CHECK9: cond.end:
-// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK9: omp.inner.for.cond:
// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4
// CHECK9-NEXT: store i32 1, ptr [[G]], align 4
-// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8
-// CHECK9-NEXT: store volatile i32 1, ptr [[TMP10]], align 4
+// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4
// CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4
-// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0
-// CHECK9-NEXT: store ptr [[G]], ptr [[TMP11]], align 8
-// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1
-// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8
-// CHECK9-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2
-// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8
+// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK9-NEXT: store ptr [[G]], ptr [[TMP9]], align 8
+// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1
+// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8
+// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
+// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2
+// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP12]], align 8
// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]])
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
-// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]])
// CHECK9-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp
index 3cbb5cfc5effab5..c0a3a8e69be0f82 100644
--- a/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp
@@ -145,7 +145,7 @@ int main() {
// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
// CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68(i64 [[TMP1]]) #[[ATTR3:[0-9]+]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68(i64 [[TMP1]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1: omp_offload.cont:
// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v()
@@ -162,7 +162,7 @@ int main() {
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -208,165 +208,50 @@ int main() {
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]])
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
-// CHECK1-NEXT: ]
-// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4
-// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
-// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
-// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
-// CHECK1: .omp.reduction.default:
-// CHECK1-NEXT: ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[SIVAR2]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]]
-// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: br label [[COND_END]]
-// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
-// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4
-// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
-// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
+// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[SIVAR2]], ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR2]], align 4
-// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4
-// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
+// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
+// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR2]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: ret void
//
//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined.omp.reduction.reduction_func
-// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
-// CHECK1-NEXT: ret void
-//
-//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func
-// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
@@ -386,7 +271,7 @@ int main() {
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v
-// CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat {
+// CHECK1-SAME: () #[[ATTR5:[0-9]+]] comdat {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
@@ -439,7 +324,7 @@ int main() {
// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
// CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1: omp_offload.failed:
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i64 [[TMP1]]) #[[ATTR3]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i64 [[TMP1]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK1: omp_offload.cont:
// CHECK1-NEXT: ret i32 0
@@ -455,7 +340,7 @@ int main() {
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR2]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -501,165 +386,50 @@ int main() {
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[T_VAR1]])
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
-// CHECK1-NEXT: ]
-// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4
-// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
-// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
-// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
-// CHECK1: .omp.reduction.default:
-// CHECK1-NEXT: ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR2]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[T_VAR2]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]]
-// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: br label [[COND_END]]
-// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
-// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4
-// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
-// CHECK1-NEXT: store i32 [[ADD4]], ptr [[T_VAR2]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
+// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP14]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK1-NEXT: ]
// CHECK1: .omp.reduction.case1:
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR2]], align 4
-// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4
-// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
+// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
+// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR2]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: ret void
//
//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func
-// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
-// CHECK1-NEXT: ret void
-//
-//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func
-// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
@@ -679,7 +449,7 @@ int main() {
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK1-SAME: () #[[ATTR8:[0-9]+]] {
+// CHECK1-SAME: () #[[ATTR7:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK1-NEXT: ret void
@@ -737,7 +507,7 @@ int main() {
// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
// CHECK3-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68(i32 [[TMP1]]) #[[ATTR3:[0-9]+]]
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68(i32 [[TMP1]]) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3: omp_offload.cont:
// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v()
@@ -754,7 +524,7 @@ int main() {
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
@@ -800,15 +570,21 @@ int main() {
// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[SIVAR1]])
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4
+// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
+// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -818,14 +594,14 @@ int main() {
// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4
// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK3-NEXT: ]
// CHECK3: .omp.reduction.case1:
// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
+// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
@@ -836,125 +612,8 @@ int main() {
// CHECK3-NEXT: ret void
//
//
-// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] {
-// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1
-// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK3: cond.true:
-// CHECK3-NEXT: br label [[COND_END:%.*]]
-// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: br label [[COND_END]]
-// CHECK3: cond.end:
-// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
-// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
-// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4
-// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
-// CHECK3: omp.body.continue:
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK3: omp.inner.for.end:
-// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
-// CHECK3-NEXT: ]
-// CHECK3: .omp.reduction.case1:
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
-// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
-// CHECK3: .omp.reduction.case2:
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
-// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
-// CHECK3: .omp.reduction.default:
-// CHECK3-NEXT: ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined.omp.reduction.reduction_func
-// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
-// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
-// CHECK3-NEXT: ret void
-//
-//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func
-// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4
@@ -974,7 +633,7 @@ int main() {
//
//
// CHECK3-LABEL: define {{[^@]+}}@_Z5tmainIiET_v
-// CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat {
+// CHECK3-SAME: () #[[ATTR5:[0-9]+]] comdat {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4
@@ -1027,7 +686,7 @@ int main() {
// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
// CHECK3-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3: omp_offload.failed:
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32 [[TMP1]]) #[[ATTR3]]
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32 [[TMP1]]) #[[ATTR2]]
// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK3: omp_offload.cont:
// CHECK3-NEXT: ret i32 0
@@ -1043,7 +702,7 @@ int main() {
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR2]] {
+// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
@@ -1089,15 +748,21 @@ int main() {
// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[T_VAR1]])
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4
+// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4
+// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1
+// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -1107,14 +772,14 @@ int main() {
// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4
// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK3-NEXT: ]
// CHECK3: .omp.reduction.case1:
// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
+// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
@@ -1125,125 +790,8 @@ int main() {
// CHECK3-NEXT: ret void
//
//
-// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined
-// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR2]] {
-// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1
-// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK3: cond.true:
-// CHECK3-NEXT: br label [[COND_END:%.*]]
-// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: br label [[COND_END]]
-// CHECK3: cond.end:
-// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
-// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
-// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4
-// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
-// CHECK3: omp.body.continue:
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK3: omp.inner.for.end:
-// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
-// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
-// CHECK3-NEXT: ]
-// CHECK3: .omp.reduction.case1:
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
-// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var)
-// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
-// CHECK3: .omp.reduction.case2:
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
-// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
-// CHECK3: .omp.reduction.default:
-// CHECK3-NEXT: ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func
-// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] {
-// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
-// CHECK3-NEXT: ret void
-//
-//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func
-// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4
@@ -1263,7 +811,7 @@ int main() {
//
//
// CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK3-SAME: () #[[ATTR8:[0-9]+]] {
+// CHECK3-SAME: () #[[ATTR7:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK3-NEXT: ret void
@@ -1289,7 +837,7 @@ int main() {
//
//
// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined
-// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] {
// CHECK9-NEXT: entry:
// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -1302,6 +850,7 @@ int main() {
// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
+// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
// CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
@@ -1335,169 +884,53 @@ int main() {
// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]])
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]]
-// CHECK9: omp.inner.for.end:
-// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK9: omp.loop.exit:
-// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
-// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8
-// CHECK9-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK9-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
-// CHECK9-NEXT: ]
-// CHECK9: .omp.reduction.case1:
-// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK9-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4
-// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
-// CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
-// CHECK9: .omp.reduction.case2:
-// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4
-// CHECK9-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4
-// CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
-// CHECK9: .omp.reduction.default:
-// CHECK9-NEXT: ret void
-//
-//
-// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp_outlined
-// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3]] {
-// CHECK9-NEXT: entry:
-// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
-// CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
-// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8
-// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8
-// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32
-// CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK9-NEXT: store i32 0, ptr [[SIVAR2]], align 4
-// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1
-// CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK9: cond.true:
-// CHECK9-NEXT: br label [[COND_END:%.*]]
-// CHECK9: cond.false:
-// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: br label [[COND_END]]
-// CHECK9: cond.end:
-// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
-// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK9: omp.inner.for.cond:
// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
-// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK9: omp.inner.for.body:
-// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
+// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4
-// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
-// CHECK9-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4
-// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0
-// CHECK9-NEXT: store ptr [[SIVAR2]], ptr [[TMP13]], align 8
+// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4
+// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8
// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]])
// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK9: omp.body.continue:
// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK9: omp.inner.for.inc:
-// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1
+// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK9: omp.inner.for.end:
// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK9: omp.loop.exit:
-// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
-// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK9-NEXT: store ptr [[SIVAR2]], ptr [[TMP15]], align 8
-// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
-// CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
-// CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP13]], align 8
+// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK9-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
// CHECK9-NEXT: ]
// CHECK9: .omp.reduction.case1:
-// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR2]], align 4
-// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK9-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4
-// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
+// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4
+// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var)
// CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK9: .omp.reduction.case2:
-// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR2]], align 4
-// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP19]] monotonic, align 4
+// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4
+// CHECK9-NEXT: [[TMP18:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP17]] monotonic, align 4
// CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK9: .omp.reduction.default:
// CHECK9-NEXT: ret void
//
//
-// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp_outlined.omp.reduction.reduction_func
-// CHECK9-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] {
-// CHECK9-NEXT: entry:
-// CHECK9-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK9-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
-// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
-// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
-// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4
-// CHECK9-NEXT: ret void
-//
-//
// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp.reduction.reduction_func
-// CHECK9-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR5]] {
+// CHECK9-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK9-NEXT: entry:
// CHECK9-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
@@ -1517,7 +950,7 @@ int main() {
//
//
// CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
// CHECK9-NEXT: entry:
// CHECK9-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK9-NEXT: ret void
More information about the cfe-commits
mailing list