[clang] 289a0f2 - [OpenMP] Remove SPMD specific handling during globalization
Johannes Doerfert via cfe-commits
cfe-commits at lists.llvm.org
Thu Oct 26 14:38:53 PDT 2023
Author: Johannes Doerfert
Date: 2023-10-26T14:38:23-07:00
New Revision: 289a0f255dbe0a3cc7b31d9bd5992f648df3fb9d
URL: https://github.com/llvm/llvm-project/commit/289a0f255dbe0a3cc7b31d9bd5992f648df3fb9d
DIFF: https://github.com/llvm/llvm-project/commit/289a0f255dbe0a3cc7b31d9bd5992f648df3fb9d.diff
LOG: [OpenMP] Remove SPMD specific handling during globalization
Globalization and SPMD are different things that used to be conflated.
Some leftover crossover interactions remain, trying to remove them now.
Added:
Modified:
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 56de8d39fe856d0..0604ece7c287897 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -1046,10 +1046,8 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(
}
void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,
- SourceLocation Loc,
- bool WithSPMDCheck) {
- if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic &&
- getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
+ SourceLocation Loc) {
+ if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic)
return;
CGBuilderTy &Bld = CGF.Builder;
@@ -1158,10 +1156,8 @@ void CGOpenMPRuntimeGPU::getKmpcFreeShared(
{AddrSizePair.first, AddrSizePair.second});
}
-void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF,
- bool WithSPMDCheck) {
- if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic &&
- getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
+void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF) {
+ if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic)
return;
const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
@@ -3350,13 +3346,13 @@ void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,
Data.insert(std::make_pair(VD, MappedVarData()));
}
if (!NeedToDelayGlobalization) {
- emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);
+ emitGenericVarsProlog(CGF, D->getBeginLoc());
struct GlobalizationScope final : EHScopeStack::Cleanup {
GlobalizationScope() = default;
void Emit(CodeGenFunction &CGF, Flags flags) override {
static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime())
- .emitGenericVarsEpilog(CGF, /*WithSPMDCheck=*/true);
+ .emitGenericVarsEpilog(CGF);
}
};
CGF.EHStack.pushCleanup<GlobalizationScope>(NormalAndEHCleanup);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
index 86871dfce418fde..c4501a1a2a496b0 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -68,11 +68,10 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
bool IsSPMD);
/// Helper for generic variables globalization prolog.
- void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc,
- bool WithSPMDCheck = false);
+ void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc);
/// Helper for generic variables globalization epilog.
- void emitGenericVarsEpilog(CodeGenFunction &CGF, bool WithSPMDCheck = false);
+ void emitGenericVarsEpilog(CodeGenFunction &CGF);
//
// Base class overrides.
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
index ce8b2d6b4622e1f..df43851378bcbb0 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
@@ -98,7 +98,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -116,13 +116,13 @@ int bar(int n){
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8
@@ -132,21 +132,20 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
@@ -154,11 +153,11 @@ int bar(int n){
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
-// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
+// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
@@ -171,10 +170,10 @@ int bar(int n){
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
-// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
+// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
@@ -207,29 +206,29 @@ int bar(int n){
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
+// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
-// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
+// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
-// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
+// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]]
-// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]]
-// CHECK1: cond.true11:
-// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: br label [[COND_END13:%.*]]
-// CHECK1: cond.false12:
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]]
+// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
+// CHECK1: cond.true10:
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: br label [[COND_END12:%.*]]
+// CHECK1: cond.false11:
// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: br label [[COND_END13]]
-// CHECK1: cond.end13:
-// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ]
-// CHECK1-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: br label [[COND_END12]]
+// CHECK1: cond.end12:
+// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ]
+// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
@@ -249,7 +248,6 @@ int bar(int n){
// CHECK1: .omp.lastprivate.done:
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
-// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[L1]], i64 4)
// CHECK1-NEXT: ret void
//
//
@@ -386,7 +384,7 @@ int bar(int n){
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
-// CHECK1-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR6:[0-9]+]] {
+// CHECK1-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
@@ -406,7 +404,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -652,7 +650,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -843,7 +841,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -1069,7 +1067,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -1382,7 +1380,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -1648,7 +1646,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -1666,13 +1664,13 @@ int bar(int n){
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8
@@ -1682,21 +1680,20 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK2-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
@@ -1704,11 +1701,11 @@ int bar(int n){
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
-// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
+// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
@@ -1721,10 +1718,10 @@ int bar(int n){
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
-// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
+// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
@@ -1757,29 +1754,29 @@ int bar(int n){
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
+// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
-// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
+// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
-// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
+// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]]
-// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]]
-// CHECK2: cond.true11:
-// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: br label [[COND_END13:%.*]]
-// CHECK2: cond.false12:
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]]
+// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
+// CHECK2: cond.true10:
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: br label [[COND_END12:%.*]]
+// CHECK2: cond.false11:
// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: br label [[COND_END13]]
-// CHECK2: cond.end13:
-// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ]
-// CHECK2-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: br label [[COND_END12]]
+// CHECK2: cond.end12:
+// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ]
+// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
@@ -1799,7 +1796,6 @@ int bar(int n){
// CHECK2: .omp.lastprivate.done:
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
-// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[L1]], i64 4)
// CHECK2-NEXT: ret void
//
//
@@ -1936,7 +1932,7 @@ int bar(int n){
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
-// CHECK2-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR6:[0-9]+]] {
+// CHECK2-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
@@ -1956,7 +1952,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -2202,7 +2198,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -2393,7 +2389,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -2619,7 +2615,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -2927,7 +2923,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR]], align 8
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]]
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -3193,7 +3189,7 @@ int bar(int n){
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -3211,13 +3207,13 @@ int bar(int n){
// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4
@@ -3227,21 +3223,20 @@ int bar(int n){
// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3: omp.precond.then:
// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
@@ -3249,11 +3244,11 @@ int bar(int n){
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
-// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
+// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3: cond.true:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
@@ -3266,10 +3261,10 @@ int bar(int n){
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK3-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
-// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
+// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
@@ -3300,29 +3295,29 @@ int bar(int n){
// CHECK3: omp.inner.for.inc:
// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
+// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
+// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
-// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
+// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]]
-// CHECK3-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]]
-// CHECK3: cond.true11:
-// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK3-NEXT: br label [[COND_END13:%.*]]
-// CHECK3: cond.false12:
+// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]]
+// CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
+// CHECK3: cond.true10:
+// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: br label [[COND_END12:%.*]]
+// CHECK3: cond.false11:
// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: br label [[COND_END13]]
-// CHECK3: cond.end13:
-// CHECK3-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ]
-// CHECK3-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: br label [[COND_END12]]
+// CHECK3: cond.end12:
+// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE10]] ], [ [[TMP40]], [[COND_FALSE11]] ]
+// CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
@@ -3342,7 +3337,6 @@ int bar(int n){
// CHECK3: .omp.lastprivate.done:
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
-// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[L1]], i32 4)
// CHECK3-NEXT: ret void
//
//
@@ -3474,7 +3468,7 @@ int bar(int n){
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
-// CHECK3-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR6:[0-9]+]] {
+// CHECK3-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
@@ -3494,7 +3488,7 @@ int bar(int n){
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -3734,7 +3728,7 @@ int bar(int n){
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -3919,7 +3913,7 @@ int bar(int n){
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -4138,7 +4132,7 @@ int bar(int n){
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -4454,7 +4448,7 @@ int bar(int n){
// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR]], align 4
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]]
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
index dbc2a6c6668a27f..fdd4457739510d0 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -86,7 +86,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -104,13 +104,13 @@ int bar(int n){
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8
@@ -120,21 +120,20 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
@@ -142,11 +141,11 @@ int bar(int n){
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
-// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
+// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
@@ -159,10 +158,10 @@ int bar(int n){
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]]
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
-// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
+// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
@@ -195,29 +194,29 @@ int bar(int n){
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
+// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
-// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
+// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
-// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
+// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]]
-// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]]
-// CHECK1: cond.true11:
-// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK1-NEXT: br label [[COND_END13:%.*]]
-// CHECK1: cond.false12:
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]]
+// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
+// CHECK1: cond.true10:
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK1-NEXT: br label [[COND_END12:%.*]]
+// CHECK1: cond.false11:
// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK1-NEXT: br label [[COND_END13]]
-// CHECK1: cond.end13:
-// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ]
-// CHECK1-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK1-NEXT: br label [[COND_END12]]
+// CHECK1: cond.end12:
+// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ]
+// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK1-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
@@ -232,11 +231,11 @@ int bar(int n){
// CHECK1-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK1: .omp.final.then:
// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP48]], 0
-// CHECK1-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1
-// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD17]], ptr [[I4]], align 4
+// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP48]], 0
+// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1
+// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK1-NEXT: store i32 [[ADD16]], ptr [[I3]], align 4
// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK1: .omp.final.done:
// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
@@ -249,7 +248,6 @@ int bar(int n){
// CHECK1: .omp.lastprivate.done:
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
-// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[L1]], i64 4)
// CHECK1-NEXT: ret void
//
//
@@ -398,7 +396,7 @@ int bar(int n){
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32
-// CHECK1-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR6:[0-9]+]] {
+// CHECK1-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
@@ -418,7 +416,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -688,7 +686,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -893,7 +891,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -1141,7 +1139,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -1159,13 +1157,13 @@ int bar(int n){
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4
@@ -1175,21 +1173,20 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
@@ -1197,11 +1194,11 @@ int bar(int n){
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
-// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
+// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
@@ -1214,10 +1211,10 @@ int bar(int n){
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]]
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
-// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
+// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
@@ -1248,29 +1245,29 @@ int bar(int n){
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
+// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
+// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
-// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
+// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]]
-// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]]
-// CHECK2: cond.true11:
-// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK2-NEXT: br label [[COND_END13:%.*]]
-// CHECK2: cond.false12:
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]]
+// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
+// CHECK2: cond.true10:
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK2-NEXT: br label [[COND_END12:%.*]]
+// CHECK2: cond.false11:
// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK2-NEXT: br label [[COND_END13]]
-// CHECK2: cond.end13:
-// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ]
-// CHECK2-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK2-NEXT: br label [[COND_END12]]
+// CHECK2: cond.end12:
+// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE10]] ], [ [[TMP40]], [[COND_FALSE11]] ]
+// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK2-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
@@ -1285,11 +1282,11 @@ int bar(int n){
// CHECK2-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK2: .omp.final.then:
// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP46]], 0
-// CHECK2-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
-// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1
-// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD17]], ptr [[I4]], align 4
+// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP46]], 0
+// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
+// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1
+// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK2-NEXT: store i32 [[ADD16]], ptr [[I3]], align 4
// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK2: .omp.final.done:
// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
@@ -1302,7 +1299,6 @@ int bar(int n){
// CHECK2: .omp.lastprivate.done:
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
-// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[L1]], i32 4)
// CHECK2-NEXT: ret void
//
//
@@ -1446,7 +1442,7 @@ int bar(int n){
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32
-// CHECK2-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR6:[0-9]+]] {
+// CHECK2-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
@@ -1466,7 +1462,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -1730,7 +1726,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -1929,7 +1925,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
index 3173c075d7047b4..be814ed13349cb1 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
@@ -94,7 +94,7 @@ int bar(int n){
// CHECK45-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8
// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
+// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
@@ -112,34 +112,33 @@ int bar(int n){
// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK45-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK45-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK45-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK45-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[I4:%.*]] = alloca i32, align 4
+// CHECK45-64-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK45-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK45-64-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK45-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK45-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK45-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK45-64-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK45-64-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK45-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK45-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK45-64-NEXT: store i32 0, ptr [[I]], align 4
// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK45-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK45-64: omp.precond.then:
// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK45-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
// CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
@@ -149,11 +148,11 @@ int bar(int n){
// CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK45-64: omp.dispatch.cond:
// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK45-64-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
-// CHECK45-64-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK45-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
+// CHECK45-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK45-64: cond.true:
-// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK45-64-NEXT: br label [[COND_END:%.*]]
// CHECK45-64: cond.false:
// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
@@ -165,45 +164,45 @@ int bar(int n){
// CHECK45-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
-// CHECK45-64-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
+// CHECK45-64-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
+// CHECK45-64-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK45-64: omp.dispatch.body:
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-64: omp.inner.for.cond:
// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]]
// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK45-64-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
-// CHECK45-64-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK45-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
+// CHECK45-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-64: omp.inner.for.body:
// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK45-64-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK45-64-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-64: omp.body.continue:
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-64: omp.inner.for.inc:
// CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK45-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK45-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK45-64-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
+// CHECK45-64-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
// CHECK45-64: omp.inner.for.end:
// CHECK45-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK45-64: omp.dispatch.inc:
// CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK45-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK45-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4
+// CHECK45-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK45-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
// CHECK45-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK45-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4
+// CHECK45-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK45-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
// CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK45-64: omp.dispatch.end:
// CHECK45-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
@@ -214,11 +213,11 @@ int bar(int n){
// CHECK45-64-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK45-64: .omp.final.then:
// CHECK45-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK45-64-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP28]], 0
-// CHECK45-64-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
-// CHECK45-64-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1
-// CHECK45-64-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]]
-// CHECK45-64-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4
+// CHECK45-64-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK45-64-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
+// CHECK45-64-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
+// CHECK45-64-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
+// CHECK45-64-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
// CHECK45-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK45-64: .omp.final.done:
// CHECK45-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
@@ -231,12 +230,11 @@ int bar(int n){
// CHECK45-64: .omp.lastprivate.done:
// CHECK45-64-NEXT: br label [[OMP_PRECOND_END]]
// CHECK45-64: omp.precond.end:
-// CHECK45-64-NEXT: call void @__kmpc_free_shared(ptr [[L1]], i64 4)
// CHECK45-64-NEXT: ret void
//
//
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40
-// CHECK45-64-SAME: (i64 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR5:[0-9]+]] {
+// CHECK45-64-SAME: (i64 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK45-64-NEXT: entry:
// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
@@ -256,7 +254,7 @@ int bar(int n){
// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
@@ -403,7 +401,7 @@ int bar(int n){
// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
@@ -525,7 +523,7 @@ int bar(int n){
// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8
// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
+// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
@@ -677,7 +675,7 @@ int bar(int n){
// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4
// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
+// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
@@ -695,34 +693,33 @@ int bar(int n){
// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK45-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK45-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK45-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK45-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[I4:%.*]] = alloca i32, align 4
+// CHECK45-32-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK45-32-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK45-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK45-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK45-32-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK45-32-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK45-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK45-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK45-32-NEXT: store i32 0, ptr [[I]], align 4
// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK45-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK45-32: omp.precond.then:
// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK45-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
// CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
@@ -732,11 +729,11 @@ int bar(int n){
// CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK45-32: omp.dispatch.cond:
// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK45-32-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
-// CHECK45-32-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK45-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
+// CHECK45-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK45-32: cond.true:
-// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK45-32-NEXT: br label [[COND_END:%.*]]
// CHECK45-32: cond.false:
// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
@@ -748,44 +745,44 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
-// CHECK45-32-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
+// CHECK45-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
+// CHECK45-32-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK45-32: omp.dispatch.body:
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-32: omp.inner.for.cond:
// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]]
// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK45-32-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
-// CHECK45-32-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK45-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
+// CHECK45-32-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-32: omp.inner.for.body:
// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]]
// CHECK45-32-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK45-32-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-32: omp.body.continue:
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-32: omp.inner.for.inc:
// CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK45-32-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK45-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK45-32-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
+// CHECK45-32-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
// CHECK45-32: omp.inner.for.end:
// CHECK45-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK45-32: omp.dispatch.inc:
// CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK45-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK45-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4
+// CHECK45-32-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK45-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
// CHECK45-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK45-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK45-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
// CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK45-32: omp.dispatch.end:
// CHECK45-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
@@ -796,11 +793,11 @@ int bar(int n){
// CHECK45-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK45-32: .omp.final.then:
// CHECK45-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK45-32-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP28]], 0
-// CHECK45-32-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
-// CHECK45-32-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1
-// CHECK45-32-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]]
-// CHECK45-32-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4
+// CHECK45-32-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK45-32-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
+// CHECK45-32-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
+// CHECK45-32-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
+// CHECK45-32-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
// CHECK45-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK45-32: .omp.final.done:
// CHECK45-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
@@ -813,12 +810,11 @@ int bar(int n){
// CHECK45-32: .omp.lastprivate.done:
// CHECK45-32-NEXT: br label [[OMP_PRECOND_END]]
// CHECK45-32: omp.precond.end:
-// CHECK45-32-NEXT: call void @__kmpc_free_shared(ptr [[L1]], i32 4)
// CHECK45-32-NEXT: ret void
//
//
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40
-// CHECK45-32-SAME: (i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR5:[0-9]+]] {
+// CHECK45-32-SAME: (i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK45-32-NEXT: entry:
// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
@@ -838,7 +834,7 @@ int bar(int n){
// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
@@ -984,7 +980,7 @@ int bar(int n){
// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
@@ -1105,7 +1101,7 @@ int bar(int n){
// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
+// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
@@ -1255,7 +1251,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4
// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
+// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
@@ -1273,34 +1269,33 @@ int bar(int n){
// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK45-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK45-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[I4:%.*]] = alloca i32, align 4
+// CHECK45-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK45-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK45-32-EX-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK45-32-EX-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK45-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK45-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK45-32-EX-NEXT: store i32 0, ptr [[I]], align 4
// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK45-32-EX: omp.precond.then:
// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK45-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
// CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
@@ -1310,11 +1305,11 @@ int bar(int n){
// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK45-32-EX: omp.dispatch.cond:
// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK45-32-EX-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
-// CHECK45-32-EX-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK45-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
+// CHECK45-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK45-32-EX: cond.true:
-// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK45-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK45-32-EX: cond.false:
// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
@@ -1326,44 +1321,44 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
-// CHECK45-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
+// CHECK45-32-EX-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
+// CHECK45-32-EX-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK45-32-EX: omp.dispatch.body:
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-32-EX: omp.inner.for.cond:
// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]]
// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK45-32-EX-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
-// CHECK45-32-EX-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK45-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
+// CHECK45-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-32-EX: omp.inner.for.body:
// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]]
// CHECK45-32-EX-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK45-32-EX-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-32-EX: omp.body.continue:
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-32-EX: omp.inner.for.inc:
// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK45-32-EX-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK45-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK45-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
+// CHECK45-32-EX-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
// CHECK45-32-EX: omp.inner.for.end:
// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK45-32-EX: omp.dispatch.inc:
// CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK45-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4
+// CHECK45-32-EX-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
// CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK45-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK45-32-EX: omp.dispatch.end:
// CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
@@ -1374,11 +1369,11 @@ int bar(int n){
// CHECK45-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK45-32-EX: .omp.final.then:
// CHECK45-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK45-32-EX-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP28]], 0
-// CHECK45-32-EX-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
-// CHECK45-32-EX-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1
-// CHECK45-32-EX-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4
+// CHECK45-32-EX-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK45-32-EX-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
+// CHECK45-32-EX-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
+// CHECK45-32-EX-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
// CHECK45-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK45-32-EX: .omp.final.done:
// CHECK45-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
@@ -1391,12 +1386,11 @@ int bar(int n){
// CHECK45-32-EX: .omp.lastprivate.done:
// CHECK45-32-EX-NEXT: br label [[OMP_PRECOND_END]]
// CHECK45-32-EX: omp.precond.end:
-// CHECK45-32-EX-NEXT: call void @__kmpc_free_shared(ptr [[L1]], i32 4)
// CHECK45-32-EX-NEXT: ret void
//
//
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40
-// CHECK45-32-EX-SAME: (i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR5:[0-9]+]] {
+// CHECK45-32-EX-SAME: (i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK45-32-EX-NEXT: entry:
// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
@@ -1416,7 +1410,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
@@ -1562,7 +1556,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
@@ -1683,7 +1677,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
+// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
@@ -1833,7 +1827,7 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -1851,34 +1845,33 @@ int bar(int n){
// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I4:%.*]] = alloca i32, align 4
+// CHECK-64-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK-64-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK-64-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK-64-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: store i32 0, ptr [[I]], align 4
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK-64: omp.precond.then:
// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
@@ -1888,11 +1881,11 @@ int bar(int n){
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK-64-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
-// CHECK-64-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
+// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
@@ -1904,45 +1897,45 @@ int bar(int n){
// CHECK-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
-// CHECK-64-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
+// CHECK-64-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
+// CHECK-64-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]]
// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK-64-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
+// CHECK-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK-64-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK-64-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK-64-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
+// CHECK-64-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4
+// CHECK-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK-64-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
@@ -1953,11 +1946,11 @@ int bar(int n){
// CHECK-64-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK-64-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP28]], 0
-// CHECK-64-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
-// CHECK-64-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1
-// CHECK-64-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]]
-// CHECK-64-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4
+// CHECK-64-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK-64-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
+// CHECK-64-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
+// CHECK-64-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
+// CHECK-64-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
@@ -1970,12 +1963,11 @@ int bar(int n){
// CHECK-64: .omp.lastprivate.done:
// CHECK-64-NEXT: br label [[OMP_PRECOND_END]]
// CHECK-64: omp.precond.end:
-// CHECK-64-NEXT: call void @__kmpc_free_shared(ptr [[L1]], i64 4)
// CHECK-64-NEXT: ret void
//
//
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40
-// CHECK-64-SAME: (i64 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR5:[0-9]+]] {
+// CHECK-64-SAME: (i64 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
@@ -1995,7 +1987,7 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -2142,7 +2134,7 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -2264,7 +2256,7 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -2416,7 +2408,7 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -2434,34 +2426,33 @@ int bar(int n){
// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I4:%.*]] = alloca i32, align 4
+// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK-32-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK-32-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK-32-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: store i32 0, ptr [[I]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK-32: omp.precond.then:
// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
@@ -2471,11 +2462,11 @@ int bar(int n){
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
-// CHECK-32-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
+// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
@@ -2487,44 +2478,44 @@ int bar(int n){
// CHECK-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
+// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
+// CHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]]
// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK-32-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
+// CHECK-32-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]]
// CHECK-32-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK-32-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK-32-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK-32-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
+// CHECK-32-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4
+// CHECK-32-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK-32-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
@@ -2535,11 +2526,11 @@ int bar(int n){
// CHECK-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK-32-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP28]], 0
-// CHECK-32-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
-// CHECK-32-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1
-// CHECK-32-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]]
-// CHECK-32-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4
+// CHECK-32-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK-32-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
+// CHECK-32-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
+// CHECK-32-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
+// CHECK-32-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
@@ -2552,12 +2543,11 @@ int bar(int n){
// CHECK-32: .omp.lastprivate.done:
// CHECK-32-NEXT: br label [[OMP_PRECOND_END]]
// CHECK-32: omp.precond.end:
-// CHECK-32-NEXT: call void @__kmpc_free_shared(ptr [[L1]], i32 4)
// CHECK-32-NEXT: ret void
//
//
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40
-// CHECK-32-SAME: (i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR5:[0-9]+]] {
+// CHECK-32-SAME: (i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
@@ -2577,7 +2567,7 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -2723,7 +2713,7 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -2844,7 +2834,7 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -2994,7 +2984,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -3012,34 +3002,33 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I4:%.*]] = alloca i32, align 4
+// CHECK-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK-32-EX-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK-32-EX-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-EX-NEXT: store i32 0, ptr [[I]], align 4
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK-32-EX: omp.precond.then:
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
@@ -3049,11 +3038,11 @@ int bar(int n){
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK-32-EX-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
-// CHECK-32-EX-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
+// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
@@ -3065,44 +3054,44 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
+// CHECK-32-EX-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
+// CHECK-32-EX-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]]
// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK-32-EX-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
+// CHECK-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]]
// CHECK-32-EX-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK-32-EX-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
-// CHECK-32-EX-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
+// CHECK-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
+// CHECK-32-EX-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]]
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4
+// CHECK-32-EX-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
@@ -3113,11 +3102,11 @@ int bar(int n){
// CHECK-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK-32-EX-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP28]], 0
-// CHECK-32-EX-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
-// CHECK-32-EX-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1
-// CHECK-32-EX-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4
+// CHECK-32-EX-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK-32-EX-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
+// CHECK-32-EX-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
+// CHECK-32-EX-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
+// CHECK-32-EX-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
@@ -3130,12 +3119,11 @@ int bar(int n){
// CHECK-32-EX: .omp.lastprivate.done:
// CHECK-32-EX-NEXT: br label [[OMP_PRECOND_END]]
// CHECK-32-EX: omp.precond.end:
-// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(ptr [[L1]], i32 4)
// CHECK-32-EX-NEXT: ret void
//
//
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40
-// CHECK-32-EX-SAME: (i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR5:[0-9]+]] {
+// CHECK-32-EX-SAME: (i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
@@ -3155,7 +3143,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -3301,7 +3289,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -3422,7 +3410,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
More information about the cfe-commits
mailing list