[llvm-branch-commits] [llvm] [Attributor] Use `getAssumedAddrSpace` to get address space for `AllocaInst` (PR #136865)
Shilei Tian via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Apr 23 12:19:25 PDT 2025
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/136865
>From c0e6a62c1c58763c9cf17327dc7fdaf1fd0840b7 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Wed, 23 Apr 2025 09:40:06 -0400
Subject: [PATCH] [Attributor] Use `getAllocaAddrSpace` to get address space
for `AllocaInst`
---
.../Transforms/IPO/AttributorAttributes.cpp | 12 +
.../OpenMP/custom_state_machines.ll | 780 ++++++++++----
.../OpenMP/custom_state_machines_pre_lto.ll | 948 ++++++++++++++----
.../Transforms/OpenMP/nested_parallelism.ll | 28 +-
.../Transforms/OpenMP/remove_globalization.ll | 30 +-
llvm/test/Transforms/OpenMP/spmdization.ll | 408 +++++---
.../Transforms/OpenMP/spmdization_guarding.ll | 52 +-
...mdization_guarding_two_reaching_kernels.ll | 10 +
.../Transforms/OpenMP/spmdization_indirect.ll | 208 ++--
...zation_no_guarding_two_reaching_kernels.ll | 10 +
10 files changed, 1870 insertions(+), 616 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index ac56df3823e20..133233ded42e5 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12603,6 +12603,18 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
auto CheckAddressSpace = [&](Value &Obj) {
if (isa<UndefValue>(&Obj))
return true;
+ // Some targets relax the requirement for alloca to be in an exact address
+ // space, allowing it in certain other address spaces instead. These
+ // targets later lower alloca to the correct address space in the
+ // pipeline. Therefore, we need to query TTI to determine the appropriate
+ // address space.
+ if (auto *AI = dyn_cast<AllocaInst>(&Obj)) {
+ Function *Fn = AI->getFunction();
+ auto *TTI =
+ A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(
+ *Fn);
+ return takeAddressSpace(TTI->getAssumedAddrSpace(AI));
+ }
// If an argument in flat address space only has addrspace cast uses, and
// those casts are same, then we take the dst addrspace.
if (auto *Arg = dyn_cast<Argument>(&Obj)) {
diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
index 10e521bbfcc10..b0b7c26f55575 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
@@ -906,11 +906,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
-; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
+; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
-; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
@@ -929,6 +931,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; AMDGPU-NEXT: ret void
@@ -975,17 +979,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
-; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
+; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP1]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
+; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP1]], [[BLOCK_SIZE]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: worker_state_machine.begin:
-; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
@@ -999,12 +1004,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__2_wrapper.ID
; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; AMDGPU: worker_state_machine.parallel_region.execute:
-; AMDGPU-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; AMDGPU: worker_state_machine.parallel_region.check1:
; AMDGPU-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
; AMDGPU: worker_state_machine.parallel_region.execute2:
-; AMDGPU-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; AMDGPU: worker_state_machine.parallel_region.check3:
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
@@ -1012,13 +1017,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel()
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; AMDGPU: worker_state_machine.done.barrier:
-; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; AMDGPU: thread.user_code.check:
-; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
-; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
@@ -1030,9 +1036,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -1046,6 +1055,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; AMDGPU-NEXT: ret void
;
@@ -1058,6 +1069,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
@@ -1069,6 +1083,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1081,6 +1097,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
@@ -1093,17 +1112,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
-; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
+; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP1]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
+; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP1]], [[BLOCK_SIZE]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: worker_state_machine.begin:
-; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
@@ -1117,18 +1137,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__17_wrapper
; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; AMDGPU: worker_state_machine.parallel_region.execute:
-; AMDGPU-NEXT: call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; AMDGPU: worker_state_machine.parallel_region.check1:
; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__5_wrapper.ID
; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
; AMDGPU: worker_state_machine.parallel_region.execute2:
-; AMDGPU-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; AMDGPU: worker_state_machine.parallel_region.check3:
; AMDGPU-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE5:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK6:%.*]]
; AMDGPU: worker_state_machine.parallel_region.execute5:
-; AMDGPU-NEXT: call void @__omp_outlined__18_wrapper(i16 0, i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__omp_outlined__18_wrapper(i16 0, i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; AMDGPU: worker_state_machine.parallel_region.check6:
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
@@ -1136,13 +1156,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel()
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; AMDGPU: worker_state_machine.done.barrier:
-; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; AMDGPU: thread.user_code.check:
-; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
-; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
@@ -1154,8 +1175,11 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -1190,6 +1214,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1202,6 +1228,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
@@ -1234,17 +1263,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
-; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
+; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP1]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
+; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP1]], [[BLOCK_SIZE]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: worker_state_machine.begin:
-; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
@@ -1258,28 +1288,29 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__7_wrapper.ID
; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; AMDGPU: worker_state_machine.parallel_region.execute:
-; AMDGPU-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; AMDGPU: worker_state_machine.parallel_region.check1:
; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__8_wrapper.ID
; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
; AMDGPU: worker_state_machine.parallel_region.execute2:
-; AMDGPU-NEXT: call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; AMDGPU: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
+; AMDGPU-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; AMDGPU: worker_state_machine.parallel_region.end:
; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel()
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; AMDGPU: worker_state_machine.done.barrier:
-; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; AMDGPU: thread.user_code.check:
-; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
-; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
@@ -1291,9 +1322,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -1306,6 +1340,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1318,6 +1354,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
@@ -1329,6 +1368,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1341,6 +1382,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
@@ -1353,17 +1397,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
-; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
+; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP1]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
+; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP1]], [[BLOCK_SIZE]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: worker_state_machine.begin:
-; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
@@ -1377,12 +1422,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__10_wrapper.ID
; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; AMDGPU: worker_state_machine.parallel_region.execute:
-; AMDGPU-NEXT: call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; AMDGPU: worker_state_machine.parallel_region.check1:
; AMDGPU-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
; AMDGPU: worker_state_machine.parallel_region.execute2:
-; AMDGPU-NEXT: call void @__omp_outlined__11_wrapper(i16 0, i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__omp_outlined__11_wrapper(i16 0, i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; AMDGPU: worker_state_machine.parallel_region.check3:
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
@@ -1390,13 +1435,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel()
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; AMDGPU: worker_state_machine.done.barrier:
-; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; AMDGPU: thread.user_code.check:
-; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
-; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
@@ -1408,9 +1454,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -1423,6 +1472,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1435,6 +1486,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
@@ -1446,6 +1500,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1458,6 +1514,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
@@ -1470,17 +1529,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
-; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
+; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP1]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
+; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP1]], [[BLOCK_SIZE]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: worker_state_machine.begin:
-; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
@@ -1494,12 +1554,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__13_wrapper.ID
; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; AMDGPU: worker_state_machine.parallel_region.execute:
-; AMDGPU-NEXT: call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; AMDGPU: worker_state_machine.parallel_region.check1:
; AMDGPU-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
; AMDGPU: worker_state_machine.parallel_region.execute2:
-; AMDGPU-NEXT: call void @__omp_outlined__14_wrapper(i16 0, i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__omp_outlined__14_wrapper(i16 0, i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; AMDGPU: worker_state_machine.parallel_region.check3:
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
@@ -1507,13 +1567,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel()
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; AMDGPU: worker_state_machine.done.barrier:
-; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; AMDGPU: thread.user_code.check:
-; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
-; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
@@ -1525,9 +1586,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__12
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -1540,6 +1604,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1552,6 +1618,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
@@ -1563,6 +1632,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1575,6 +1646,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
@@ -1586,11 +1660,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
-; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
+; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
-; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
@@ -1604,6 +1680,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; AMDGPU-NEXT: ret void
@@ -1614,15 +1692,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: store i32 [[A]], ptr addrspace(5) [[TMP0]], align 4
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4
+; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 0
; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; AMDGPU: if.then:
; AMDGPU-NEXT: br label [[RETURN:%.*]]
; AMDGPU: if.end:
-; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4
+; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 1
; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
; AMDGPU-NEXT: br label [[RETURN]]
@@ -1658,17 +1739,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
-; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
+; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP1]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
+; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP1]], [[BLOCK_SIZE]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: worker_state_machine.begin:
-; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
@@ -1679,19 +1761,20 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU: worker_state_machine.is_active.check:
; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; AMDGPU: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
+; AMDGPU-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; AMDGPU: worker_state_machine.parallel_region.end:
; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel()
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; AMDGPU: worker_state_machine.done.barrier:
-; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; AMDGPU: thread.user_code.check:
-; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
-; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
@@ -1705,6 +1788,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; AMDGPU-NEXT: ret void
;
@@ -1722,6 +1807,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1734,6 +1821,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
@@ -1745,6 +1835,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1757,6 +1849,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
@@ -1788,6 +1883,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1800,6 +1897,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
@@ -1811,11 +1911,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
-; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
+; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
-; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; NVPTX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
@@ -1834,6 +1936,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; NVPTX-NEXT: ret void
@@ -1880,17 +1984,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
-; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
+; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP1]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
+; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP1]], [[BLOCK_SIZE]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
-; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
@@ -1903,12 +2008,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__2_wrapper.ID
; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; NVPTX: worker_state_machine.parallel_region.execute:
-; NVPTX-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]])
+; NVPTX-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; NVPTX: worker_state_machine.parallel_region.check1:
; NVPTX-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
; NVPTX: worker_state_machine.parallel_region.execute2:
-; NVPTX-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
+; NVPTX-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; NVPTX: worker_state_machine.parallel_region.check3:
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
@@ -1916,13 +2021,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel()
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; NVPTX: worker_state_machine.done.barrier:
-; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; NVPTX: thread.user_code.check:
-; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
-; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
@@ -1934,9 +2040,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -1950,6 +2059,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; NVPTX-NEXT: ret void
;
@@ -1962,6 +2073,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
@@ -1973,6 +2087,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -1985,6 +2101,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
@@ -1997,17 +2116,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
-; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
+; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP1]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
+; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP1]], [[BLOCK_SIZE]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
-; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
@@ -2020,18 +2140,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__17_wrapper
; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; NVPTX: worker_state_machine.parallel_region.execute:
-; NVPTX-NEXT: call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]])
+; NVPTX-NEXT: call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; NVPTX: worker_state_machine.parallel_region.check1:
; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__5_wrapper.ID
; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
; NVPTX: worker_state_machine.parallel_region.execute2:
-; NVPTX-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
+; NVPTX-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; NVPTX: worker_state_machine.parallel_region.check3:
; NVPTX-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE5:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK6:%.*]]
; NVPTX: worker_state_machine.parallel_region.execute5:
-; NVPTX-NEXT: call void @__omp_outlined__18_wrapper(i16 0, i32 [[TMP0]])
+; NVPTX-NEXT: call void @__omp_outlined__18_wrapper(i16 0, i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; NVPTX: worker_state_machine.parallel_region.check6:
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
@@ -2039,13 +2159,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel()
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; NVPTX: worker_state_machine.done.barrier:
-; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; NVPTX: thread.user_code.check:
-; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
-; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
@@ -2057,8 +2178,11 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -2093,6 +2217,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2105,6 +2231,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
@@ -2137,17 +2266,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
-; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
+; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP1]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
+; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP1]], [[BLOCK_SIZE]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
-; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
@@ -2160,28 +2290,29 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__7_wrapper.ID
; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; NVPTX: worker_state_machine.parallel_region.execute:
-; NVPTX-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
+; NVPTX-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; NVPTX: worker_state_machine.parallel_region.check1:
; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__8_wrapper.ID
; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
; NVPTX: worker_state_machine.parallel_region.execute2:
-; NVPTX-NEXT: call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]])
+; NVPTX-NEXT: call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; NVPTX: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
+; NVPTX-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; NVPTX: worker_state_machine.parallel_region.end:
; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel()
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; NVPTX: worker_state_machine.done.barrier:
-; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; NVPTX: thread.user_code.check:
-; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
-; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
@@ -2193,9 +2324,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -2208,6 +2342,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2220,6 +2356,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
@@ -2231,6 +2370,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2243,6 +2384,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
@@ -2255,17 +2399,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
-; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
+; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP1]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
+; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP1]], [[BLOCK_SIZE]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
-; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
@@ -2278,12 +2423,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__10_wrapper.ID
; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; NVPTX: worker_state_machine.parallel_region.execute:
-; NVPTX-NEXT: call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]])
+; NVPTX-NEXT: call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; NVPTX: worker_state_machine.parallel_region.check1:
; NVPTX-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
; NVPTX: worker_state_machine.parallel_region.execute2:
-; NVPTX-NEXT: call void @__omp_outlined__11_wrapper(i16 0, i32 [[TMP0]])
+; NVPTX-NEXT: call void @__omp_outlined__11_wrapper(i16 0, i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; NVPTX: worker_state_machine.parallel_region.check3:
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
@@ -2291,13 +2436,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel()
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; NVPTX: worker_state_machine.done.barrier:
-; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; NVPTX: thread.user_code.check:
-; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
-; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
@@ -2309,9 +2455,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -2324,6 +2473,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2336,6 +2487,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
@@ -2347,6 +2501,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2359,6 +2515,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
@@ -2371,17 +2530,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
-; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
+; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP1]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
+; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP1]], [[BLOCK_SIZE]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
-; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
@@ -2394,12 +2554,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], @__omp_outlined__13_wrapper.ID
; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; NVPTX: worker_state_machine.parallel_region.execute:
-; NVPTX-NEXT: call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]])
+; NVPTX-NEXT: call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; NVPTX: worker_state_machine.parallel_region.check1:
; NVPTX-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
; NVPTX: worker_state_machine.parallel_region.execute2:
-; NVPTX-NEXT: call void @__omp_outlined__14_wrapper(i16 0, i32 [[TMP0]])
+; NVPTX-NEXT: call void @__omp_outlined__14_wrapper(i16 0, i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; NVPTX: worker_state_machine.parallel_region.check3:
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
@@ -2407,13 +2567,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel()
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; NVPTX: worker_state_machine.done.barrier:
-; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; NVPTX: thread.user_code.check:
-; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
-; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
@@ -2425,9 +2586,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__12
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -2440,6 +2604,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2452,6 +2618,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
@@ -2463,6 +2632,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2475,6 +2646,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
@@ -2486,11 +2660,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
-; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
+; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
-; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
@@ -2504,6 +2680,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; NVPTX-NEXT: ret void
@@ -2514,15 +2692,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: store i32 [[A]], ptr addrspace(5) [[TMP0]], align 4
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4
+; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 0
; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NVPTX: if.then:
; NVPTX-NEXT: br label [[RETURN:%.*]]
; NVPTX: if.end:
-; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4
+; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 1
; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
; NVPTX-NEXT: br label [[RETURN]]
@@ -2558,17 +2739,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
-; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
+; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP1]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
+; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP1]], [[BLOCK_SIZE]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
-; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
@@ -2578,19 +2760,20 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX: worker_state_machine.is_active.check:
; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; NVPTX: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
+; NVPTX-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; NVPTX: worker_state_machine.parallel_region.end:
; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel()
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; NVPTX: worker_state_machine.done.barrier:
-; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP1]])
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; NVPTX: thread.user_code.check:
-; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
-; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
@@ -2604,6 +2787,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; NVPTX-NEXT: ret void
;
@@ -2621,6 +2806,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2633,6 +2820,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
@@ -2644,6 +2834,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2656,6 +2848,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
@@ -2687,6 +2882,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2699,6 +2896,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
@@ -2710,11 +2910,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
-; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
+; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
@@ -2733,6 +2935,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; AMDGPU-DISABLED-NEXT: ret void
@@ -2778,11 +2982,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
-; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
+; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
@@ -2794,9 +3000,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
+; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -2810,6 +3019,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -2822,6 +3033,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
@@ -2833,6 +3047,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -2845,6 +3061,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
@@ -2856,11 +3075,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
-; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
+; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
@@ -2872,8 +3093,11 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
+; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -2908,6 +3132,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -2920,6 +3146,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
@@ -2951,11 +3180,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
-; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
+; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
@@ -2967,9 +3198,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
+; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -2982,6 +3216,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -2994,6 +3230,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
@@ -3005,6 +3244,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3017,6 +3258,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
@@ -3028,11 +3272,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
-; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
+; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
@@ -3044,9 +3290,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
+; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -3059,6 +3308,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3071,6 +3322,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
@@ -3082,6 +3336,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3094,6 +3350,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
@@ -3105,11 +3364,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
-; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
+; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
@@ -3121,9 +3382,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
+; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -3136,6 +3400,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3148,6 +3414,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
@@ -3159,6 +3428,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3171,6 +3442,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
@@ -3182,11 +3456,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
-; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
+; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
@@ -3200,6 +3476,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; AMDGPU-DISABLED-NEXT: ret void
@@ -3210,15 +3488,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: store i32 [[A]], ptr addrspace(5) [[TMP0]], align 4
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4
+; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 0
; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; AMDGPU-DISABLED: if.then:
; AMDGPU-DISABLED-NEXT: br label [[RETURN:%.*]]
; AMDGPU-DISABLED: if.end:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4
+; AMDGPU-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 1
; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
; AMDGPU-DISABLED-NEXT: br label [[RETURN]]
@@ -3253,11 +3534,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
-; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
+; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
@@ -3271,6 +3554,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3288,6 +3573,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3300,6 +3587,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
@@ -3311,6 +3601,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3323,6 +3615,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
@@ -3354,6 +3649,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3366,6 +3663,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
@@ -3377,11 +3677,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
-; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
+; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
@@ -3400,6 +3702,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; NVPTX-DISABLED-NEXT: ret void
@@ -3445,11 +3749,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
-; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
+; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
@@ -3461,9 +3767,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
+; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -3477,6 +3786,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3489,6 +3800,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
@@ -3500,6 +3814,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3512,6 +3828,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
@@ -3523,11 +3842,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
-; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
+; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
@@ -3539,8 +3860,11 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
+; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -3575,6 +3899,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3587,6 +3913,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
@@ -3618,11 +3947,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
-; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
+; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
@@ -3634,9 +3965,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
+; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -3649,6 +3983,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3661,6 +3997,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
@@ -3672,6 +4011,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3684,6 +4025,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
@@ -3695,11 +4039,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
-; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
+; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
@@ -3711,9 +4057,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
+; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -3726,6 +4075,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3738,6 +4089,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
@@ -3749,6 +4103,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3761,6 +4117,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
@@ -3772,11 +4131,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
-; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
+; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
@@ -3788,9 +4149,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
+; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -3803,6 +4167,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3815,6 +4181,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
@@ -3826,6 +4195,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3838,6 +4209,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
@@ -3849,11 +4223,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
-; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
+; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
@@ -3867,6 +4243,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; NVPTX-DISABLED-NEXT: ret void
@@ -3877,15 +4255,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: store i32 [[A]], ptr addrspace(5) [[TMP0]], align 4
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4
+; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 0
; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NVPTX-DISABLED: if.then:
; NVPTX-DISABLED-NEXT: br label [[RETURN:%.*]]
; NVPTX-DISABLED: if.end:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4
+; NVPTX-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 1
; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
; NVPTX-DISABLED-NEXT: br label [[RETURN]]
@@ -3920,11 +4301,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
-; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
+; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
@@ -3938,6 +4321,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3955,6 +4340,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3967,6 +4354,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
@@ -3978,6 +4368,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3990,6 +4382,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
@@ -4021,6 +4416,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -4033,6 +4430,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
index 9576ff6ca6aee..fa36db3ccef3f 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
@@ -915,11 +915,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
-; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
+; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; AMDGPU1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
@@ -933,6 +935,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; AMDGPU1-NEXT: ret void
@@ -978,11 +982,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
-; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
+; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
@@ -994,9 +1000,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__1
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
+; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -1010,6 +1019,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; AMDGPU1-NEXT: ret void
;
@@ -1022,6 +1033,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
@@ -1033,6 +1047,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1045,6 +1061,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
@@ -1056,11 +1075,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
-; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
+; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
@@ -1072,8 +1093,11 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
+; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -1108,6 +1132,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1120,6 +1146,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
@@ -1151,11 +1180,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
-; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
+; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
@@ -1167,9 +1198,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
+; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU1-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -1182,6 +1216,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1194,6 +1230,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
@@ -1205,6 +1244,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1217,6 +1258,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
@@ -1228,11 +1272,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
-; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
+; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
@@ -1244,9 +1290,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__9
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
+; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -1259,6 +1308,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1271,6 +1322,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
@@ -1282,6 +1336,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1294,6 +1350,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
@@ -1305,11 +1364,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
-; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
+; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
@@ -1321,9 +1382,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__12
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
+; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -1336,6 +1400,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1348,6 +1414,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
@@ -1359,6 +1428,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1371,6 +1442,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
@@ -1382,11 +1456,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
-; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
+; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
@@ -1400,6 +1476,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; AMDGPU1-NEXT: ret void
@@ -1410,15 +1488,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: store i32 [[A]], ptr addrspace(5) [[TMP0]], align 4
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4
+; AMDGPU1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 0
; AMDGPU1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; AMDGPU1: if.then:
; AMDGPU1-NEXT: br label [[RETURN:%.*]]
; AMDGPU1: if.end:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4
+; AMDGPU1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 1
; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
; AMDGPU1-NEXT: br label [[RETURN]]
@@ -1453,11 +1534,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
-; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
+; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
@@ -1471,6 +1554,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; AMDGPU1-NEXT: ret void
;
@@ -1488,6 +1573,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1500,6 +1587,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
@@ -1511,6 +1601,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1523,6 +1615,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
@@ -1554,6 +1649,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: entry:
; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1566,6 +1663,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
@@ -1577,11 +1677,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
-; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
+; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; NVPTX1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
@@ -1595,6 +1697,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; NVPTX1-NEXT: ret void
@@ -1640,11 +1744,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
-; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
+; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
@@ -1656,9 +1762,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__1
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
+; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -1672,6 +1781,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; NVPTX1-NEXT: ret void
;
@@ -1684,6 +1795,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
@@ -1695,6 +1809,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -1707,6 +1823,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
@@ -1718,11 +1837,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
-; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
+; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
@@ -1734,8 +1855,11 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
+; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -1770,6 +1894,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -1782,6 +1908,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
@@ -1813,11 +1942,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
-; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
+; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
@@ -1829,9 +1960,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
+; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX1-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -1844,6 +1978,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -1856,6 +1992,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
@@ -1867,6 +2006,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -1879,6 +2020,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
@@ -1890,11 +2034,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
-; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
+; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
@@ -1906,9 +2052,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__9
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
+; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -1921,6 +2070,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -1933,6 +2084,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
@@ -1944,6 +2098,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -1956,6 +2112,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
@@ -1967,11 +2126,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
-; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
+; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
@@ -1983,9 +2144,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__12
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
+; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -1998,6 +2162,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -2010,6 +2176,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
@@ -2021,6 +2190,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -2033,6 +2204,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
@@ -2044,11 +2218,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
-; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
+; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
@@ -2062,6 +2238,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; NVPTX1-NEXT: ret void
@@ -2072,15 +2250,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: store i32 [[A]], ptr addrspace(5) [[TMP0]], align 4
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4
+; NVPTX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 0
; NVPTX1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NVPTX1: if.then:
; NVPTX1-NEXT: br label [[RETURN:%.*]]
; NVPTX1: if.end:
-; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4
+; NVPTX1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 1
; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
; NVPTX1-NEXT: br label [[RETURN]]
@@ -2115,11 +2296,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
-; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
+; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
@@ -2133,6 +2316,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; NVPTX1-NEXT: ret void
;
@@ -2150,6 +2335,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -2162,6 +2349,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
@@ -2173,6 +2363,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -2185,6 +2377,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
@@ -2216,6 +2411,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: entry:
; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -2228,6 +2425,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
@@ -2239,11 +2439,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
-; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
+; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; AMDGPU2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
@@ -2257,6 +2459,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; AMDGPU2-NEXT: ret void
@@ -2302,11 +2506,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
-; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
+; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
@@ -2318,9 +2524,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__1
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
+; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -2334,6 +2543,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; AMDGPU2-NEXT: ret void
;
@@ -2346,6 +2557,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
@@ -2357,6 +2571,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2369,6 +2585,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
@@ -2380,11 +2599,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
-; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
+; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
@@ -2396,8 +2617,11 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
+; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -2432,6 +2656,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2444,6 +2670,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
@@ -2475,11 +2704,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
-; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
+; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
@@ -2491,9 +2722,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
+; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU2-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -2506,6 +2740,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2518,6 +2754,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
@@ -2529,6 +2768,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2541,6 +2782,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
@@ -2552,11 +2796,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
-; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
+; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
@@ -2568,9 +2814,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__9
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
+; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -2583,6 +2832,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2595,6 +2846,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
@@ -2606,6 +2860,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2618,6 +2874,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
@@ -2629,11 +2888,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
-; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
+; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
@@ -2645,9 +2906,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__12
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
+; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -2660,6 +2924,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2672,6 +2938,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
@@ -2683,6 +2952,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2695,6 +2966,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
@@ -2706,11 +2980,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
-; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
+; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
@@ -2724,6 +3000,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; AMDGPU2-NEXT: ret void
@@ -2734,15 +3012,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: store i32 [[A]], ptr addrspace(5) [[TMP0]], align 4
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4
+; AMDGPU2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 0
; AMDGPU2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; AMDGPU2: if.then:
; AMDGPU2-NEXT: br label [[RETURN:%.*]]
; AMDGPU2: if.end:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4
+; AMDGPU2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 1
; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
; AMDGPU2-NEXT: br label [[RETURN]]
@@ -2777,11 +3058,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
-; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
+; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
@@ -2795,6 +3078,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; AMDGPU2-NEXT: ret void
;
@@ -2812,6 +3097,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2824,6 +3111,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
@@ -2835,6 +3125,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2847,6 +3139,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
@@ -2878,6 +3173,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: entry:
; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2890,6 +3187,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
@@ -2901,11 +3201,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
-; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
+; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; AMDGPU3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
@@ -2919,6 +3221,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; AMDGPU3-NEXT: ret void
@@ -2964,11 +3268,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
-; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
+; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
@@ -2980,9 +3286,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__1
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
+; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -2996,6 +3305,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; AMDGPU3-NEXT: ret void
;
@@ -3008,6 +3319,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
@@ -3019,6 +3333,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3031,6 +3347,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
@@ -3042,11 +3361,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
-; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
+; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
@@ -3058,8 +3379,11 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
+; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -3094,6 +3418,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3106,6 +3432,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
@@ -3137,11 +3466,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
-; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
+; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
@@ -3153,9 +3484,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
+; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU3-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -3168,6 +3502,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3180,6 +3516,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
@@ -3191,6 +3530,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3203,6 +3544,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
@@ -3214,11 +3558,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
-; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
+; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
@@ -3230,9 +3576,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__9
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
+; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -3245,6 +3594,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3257,6 +3608,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
@@ -3268,6 +3622,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3280,6 +3636,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
@@ -3291,11 +3650,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
-; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
+; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
@@ -3307,9 +3668,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__12
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
+; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -3322,6 +3686,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3334,6 +3700,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
@@ -3345,6 +3714,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3357,6 +3728,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
@@ -3368,11 +3742,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
-; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
+; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
@@ -3386,6 +3762,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; AMDGPU3-NEXT: ret void
@@ -3396,15 +3774,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: store i32 [[A]], ptr addrspace(5) [[TMP0]], align 4
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4
+; AMDGPU3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 0
; AMDGPU3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; AMDGPU3: if.then:
; AMDGPU3-NEXT: br label [[RETURN:%.*]]
; AMDGPU3: if.end:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4
+; AMDGPU3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 1
; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
; AMDGPU3-NEXT: br label [[RETURN]]
@@ -3439,11 +3820,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
-; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
+; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
@@ -3457,6 +3840,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; AMDGPU3-NEXT: ret void
;
@@ -3474,6 +3859,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3486,6 +3873,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
@@ -3497,6 +3887,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3509,6 +3901,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
@@ -3540,6 +3935,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: entry:
; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3552,6 +3949,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; AMDGPU3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
@@ -3563,11 +3963,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
-; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
+; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; NVPTX2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
@@ -3581,6 +3983,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; NVPTX2-NEXT: ret void
@@ -3626,11 +4030,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
-; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
+; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
@@ -3642,9 +4048,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__1
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
+; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -3658,6 +4067,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; NVPTX2-NEXT: ret void
;
@@ -3670,6 +4081,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
@@ -3681,6 +4095,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -3693,6 +4109,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
@@ -3704,11 +4123,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
-; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
+; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
@@ -3720,8 +4141,11 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
+; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -3756,6 +4180,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -3768,6 +4194,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
@@ -3799,11 +4228,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
-; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
+; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
@@ -3815,9 +4246,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
+; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX2-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -3830,6 +4264,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -3842,6 +4278,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
@@ -3853,6 +4292,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -3865,6 +4306,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
@@ -3876,11 +4320,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
-; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
+; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
@@ -3892,9 +4338,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__9
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
+; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -3907,6 +4356,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -3919,6 +4370,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
@@ -3930,6 +4384,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -3942,6 +4398,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
@@ -3953,11 +4412,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
-; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
+; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
@@ -3969,9 +4430,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__12
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
+; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -3984,6 +4448,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -3996,6 +4462,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
@@ -4007,6 +4476,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -4019,6 +4490,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
@@ -4030,11 +4504,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
-; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
+; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
@@ -4048,6 +4524,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; NVPTX2-NEXT: ret void
@@ -4058,15 +4536,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: store i32 [[A]], ptr addrspace(5) [[TMP0]], align 4
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4
+; NVPTX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 0
; NVPTX2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NVPTX2: if.then:
; NVPTX2-NEXT: br label [[RETURN:%.*]]
; NVPTX2: if.end:
-; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4
+; NVPTX2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 1
; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
; NVPTX2-NEXT: br label [[RETURN]]
@@ -4101,11 +4582,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
-; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
+; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
@@ -4119,6 +4602,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; NVPTX2-NEXT: ret void
;
@@ -4136,6 +4621,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -4148,6 +4635,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
@@ -4159,6 +4649,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -4171,6 +4663,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
@@ -4202,6 +4697,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: entry:
; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -4214,6 +4711,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
@@ -4225,11 +4725,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
-; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
+; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; NVPTX3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
@@ -4243,6 +4745,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; NVPTX3-NEXT: ret void
@@ -4288,11 +4792,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
-; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
+; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
@@ -4304,9 +4810,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__1
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
+; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -4320,6 +4829,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; NVPTX3-NEXT: ret void
;
@@ -4332,6 +4843,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
@@ -4343,6 +4857,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4355,6 +4871,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
@@ -4366,11 +4885,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
-; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
+; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
@@ -4382,8 +4903,11 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
+; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
@@ -4418,6 +4942,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4430,6 +4956,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
@@ -4461,11 +4990,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
-; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
+; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
@@ -4477,9 +5008,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
+; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX3-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -4492,6 +5026,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4504,6 +5040,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
@@ -4515,6 +5054,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4527,6 +5068,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
@@ -4538,11 +5082,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
-; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
+; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
@@ -4554,9 +5100,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__9
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
+; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -4569,6 +5118,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4581,6 +5132,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
@@ -4592,6 +5146,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4604,6 +5160,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
@@ -4615,11 +5174,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
-; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
+; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
@@ -4631,9 +5192,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__12
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
+; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
@@ -4646,6 +5210,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4658,6 +5224,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
@@ -4669,6 +5238,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4681,6 +5252,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
@@ -4692,11 +5266,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
-; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
+; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
@@ -4710,6 +5286,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; NVPTX3-NEXT: ret void
@@ -4720,15 +5298,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: store i32 [[A]], ptr addrspace(5) [[TMP0]], align 4
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4
+; NVPTX3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 0
; NVPTX3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NVPTX3: if.then:
; NVPTX3-NEXT: br label [[RETURN:%.*]]
; NVPTX3: if.end:
-; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4
+; NVPTX3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 1
; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
; NVPTX3-NEXT: br label [[RETURN]]
@@ -4763,11 +5344,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
-; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
+; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
@@ -4781,6 +5364,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; NVPTX3-NEXT: ret void
;
@@ -4798,6 +5383,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4810,6 +5397,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
@@ -4821,6 +5411,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4833,6 +5425,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
@@ -4864,6 +5459,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: entry:
; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4876,6 +5473,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX3-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; NVPTX3-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
diff --git a/llvm/test/Transforms/OpenMP/nested_parallelism.ll b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
index 1679a27fdae8b..834c623d27695 100644
--- a/llvm/test/Transforms/OpenMP/nested_parallelism.ll
+++ b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
@@ -64,7 +64,8 @@ define weak_odr protected ptx_kernel void @__omp_offloading_10302_bd7e0_main_l13
; CHECK-NEXT: br label [[_Z3FOOI_INTERNALIZED_EXIT]]
; CHECK: _Z3fooi.internalized.exit:
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]]
-; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr addrspace(5) [[TMP4]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: call void @__kmpc_target_deinit()
@@ -109,7 +110,8 @@ define hidden void @_Z3fooi(i32 noundef %i1) local_unnamed_addr #1 {
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: [[I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
; CHECK-NEXT: store i32 [[I1:%.*]], ptr [[I]], align 16
-; CHECK-NEXT: store ptr [[I]], ptr [[CAPTURED_VARS_ADDRS]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 1)
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I]], i64 4) #[[ATTR2]]
; CHECK-NEXT: ret void
@@ -141,7 +143,8 @@ define weak_odr protected ptx_kernel void @__omp_offloading_10302_bd7e0_main_l16
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i.i_shared, align 16
-; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: call void @__kmpc_target_deinit()
@@ -175,7 +178,8 @@ define hidden void @_Z4foo1i(i32 noundef %i1) local_unnamed_addr #1 {
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: [[I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
; CHECK-NEXT: store i32 [[I1:%.*]], ptr [[I]], align 16
-; CHECK-NEXT: store ptr [[I]], ptr [[CAPTURED_VARS_ADDRS]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 1)
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I]], i64 4) #[[ATTR2]]
; CHECK-NEXT: ret void
@@ -202,7 +206,8 @@ define internal void @__omp_outlined__(ptr noalias nocapture readnone %.global_t
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: [[I_I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
; CHECK-NEXT: store i32 [[TMP0]], ptr [[I_I]], align 16
-; CHECK-NEXT: store ptr [[I_I]], ptr [[CAPTURED_VARS_ADDRS_I]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[I_I]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I_I]], i64 4) #[[ATTR2]]
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
@@ -228,15 +233,17 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #5 {
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS_I_I:%.*]] = alloca [1 x ptr], align 8
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]])
-; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[TMP5]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]])
-; CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: [[I_I_I:%.*]] = call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
; CHECK-NEXT: store i32 [[TMP4]], ptr [[I_I_I]], align 16
-; CHECK-NEXT: store ptr [[I_I_I]], ptr [[CAPTURED_VARS_ADDRS_I_I]], align 8
-; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]], i64 1)
+; CHECK-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I_I]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[I_I_I]], ptr addrspace(5) [[TMP7]], align 8
+; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]], i64 1)
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I_I_I]], i64 4) #[[ATTR2]]
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]])
; CHECK-NEXT: ret void
@@ -287,7 +294,8 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #5 {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]])
-; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[TMP5]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK-NEXT: [[INC_I:%.*]] = add nsw i32 [[TMP4]], 1
diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll
index 29f2030c4d42b..17d17fe55d42f 100644
--- a/llvm/test/Transforms/OpenMP/remove_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll
@@ -163,17 +163,22 @@ define internal void @convert_and_move_alloca() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
; CHECK-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[UB_PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[UB_PTR]] to ptr addrspace(5)
; CHECK-NEXT: br label [[INITLOOP:%.*]]
; CHECK: initloop:
-; CHECK-NEXT: store i32 0, ptr [[IV_PTR]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-NEXT: store i32 0, ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: br label [[LOOPBODY:%.*]]
; CHECK: loopbody:
-; CHECK-NEXT: [[IV:%.*]] = load i32, ptr [[IV_PTR]], align 4
-; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], 10
-; CHECK-NEXT: br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-NEXT: [[IV:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[IV]], 10
+; CHECK-NEXT: br i1 [[TMP3]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
; CHECK: loopinc:
; CHECK-NEXT: [[INC:%.*]] = add i32 [[IV]], 1
-; CHECK-NEXT: store i32 [[INC]], ptr [[IV_PTR]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-NEXT: store i32 [[INC]], ptr addrspace(5) [[TMP4]], align 4
; CHECK-NEXT: br label [[LOOPBODY]]
; CHECK: exit:
; CHECK-NEXT: ret void
@@ -183,17 +188,22 @@ define internal void @convert_and_move_alloca() {
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
; CHECK-DISABLED-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4
+; CHECK-DISABLED-NEXT: [[UB_PTR:%.*]] = alloca i32, align 4
+; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[UB_PTR]] to ptr addrspace(5)
; CHECK-DISABLED-NEXT: br label [[INITLOOP:%.*]]
; CHECK-DISABLED: initloop:
-; CHECK-DISABLED-NEXT: store i32 0, ptr [[IV_PTR]], align 4
+; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-DISABLED-NEXT: store i32 0, ptr addrspace(5) [[TMP1]], align 4
; CHECK-DISABLED-NEXT: br label [[LOOPBODY:%.*]]
; CHECK-DISABLED: loopbody:
-; CHECK-DISABLED-NEXT: [[IV:%.*]] = load i32, ptr [[IV_PTR]], align 4
-; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], 10
-; CHECK-DISABLED-NEXT: br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
+; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-DISABLED-NEXT: [[IV:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4
+; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[IV]], 10
+; CHECK-DISABLED-NEXT: br i1 [[TMP3]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
; CHECK-DISABLED: loopinc:
; CHECK-DISABLED-NEXT: [[INC:%.*]] = add i32 [[IV]], 1
-; CHECK-DISABLED-NEXT: store i32 [[INC]], ptr [[IV_PTR]], align 4
+; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-DISABLED-NEXT: store i32 [[INC]], ptr addrspace(5) [[TMP4]], align 4
; CHECK-DISABLED-NEXT: br label [[LOOPBODY]]
; CHECK-DISABLED: exit:
; CHECK-DISABLED-NEXT: ret void
diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll
index 1a629ecfee06d..c364177f01e53 100644
--- a/llvm/test/Transforms/OpenMP/spmdization.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization.ll
@@ -234,7 +234,9 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12:![0-9]+]]
; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
@@ -251,7 +253,9 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12:![0-9]+]]
; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
@@ -302,7 +306,9 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12:![0-9]+]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -319,7 +325,9 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12:![0-9]+]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -369,7 +377,9 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12:![0-9]+]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -386,7 +396,9 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12:![0-9]+]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -425,8 +437,9 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
@@ -443,8 +456,9 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
@@ -461,8 +475,9 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
@@ -479,8 +494,9 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
@@ -497,8 +513,9 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
@@ -515,8 +532,9 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
@@ -591,6 +609,8 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
@@ -601,6 +621,8 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
@@ -611,6 +633,8 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
@@ -621,6 +645,8 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
@@ -631,6 +657,8 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
@@ -641,6 +669,8 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
@@ -670,7 +700,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
@@ -687,7 +719,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
@@ -738,7 +772,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -755,7 +791,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -805,7 +843,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -822,7 +862,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -864,8 +906,9 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
@@ -884,8 +927,9 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
@@ -905,8 +949,9 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
@@ -926,8 +971,9 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
@@ -946,8 +992,9 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
@@ -966,8 +1013,9 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
@@ -1044,6 +1092,8 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
@@ -1054,6 +1104,8 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
@@ -1064,6 +1116,8 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
@@ -1074,6 +1128,8 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
@@ -1084,6 +1140,8 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
@@ -1094,6 +1152,8 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
@@ -1124,7 +1184,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
@@ -1141,7 +1203,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
@@ -1192,7 +1256,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -1209,7 +1275,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -1259,7 +1327,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -1276,7 +1346,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -1315,9 +1387,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[TMP0]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
@@ -1334,9 +1408,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[TMP0]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
@@ -1353,9 +1429,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[TMP0]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
@@ -1372,9 +1450,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[TMP0]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
@@ -1391,9 +1471,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[TMP0]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
@@ -1410,9 +1492,11 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[TMP0]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
@@ -1511,10 +1595,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; AMDGPU-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP5:%.*]] = load ptr, ptr addrspace(5) [[TMP4]], align 8
+; AMDGPU-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP6]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
@@ -1523,10 +1610,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; NVPTX-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP5:%.*]] = load ptr, ptr addrspace(5) [[TMP4]], align 8
+; NVPTX-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA20]]
+; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP6]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
@@ -1535,10 +1625,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; AMDGPU-DISABLED1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP5:%.*]] = load ptr, ptr addrspace(5) [[TMP4]], align 8
+; AMDGPU-DISABLED1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP6]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
@@ -1547,10 +1640,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; AMDGPU-DISABLED2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP5:%.*]] = load ptr, ptr addrspace(5) [[TMP4]], align 8
+; AMDGPU-DISABLED2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP6]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
@@ -1559,10 +1655,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; NVPTX-DISABLED1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP5:%.*]] = load ptr, ptr addrspace(5) [[TMP4]], align 8
+; NVPTX-DISABLED1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA20]]
+; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP6]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
@@ -1571,10 +1670,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; NVPTX-DISABLED2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP5:%.*]] = load ptr, ptr addrspace(5) [[TMP4]], align 8
+; NVPTX-DISABLED2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA20]]
+; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP6]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
@@ -1604,7 +1706,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
@@ -1621,7 +1725,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
@@ -1672,7 +1778,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -1689,7 +1797,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -1739,7 +1849,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -1756,7 +1868,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -1809,9 +1923,11 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[TMP2]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
@@ -1842,9 +1958,11 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
-; NVPTX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[TMP2]], align 8, !tbaa [[TBAA20]]
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
@@ -1862,9 +1980,11 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[TMP0]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
@@ -1882,9 +2002,11 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[TMP0]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
@@ -1902,9 +2024,11 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[TMP0]], align 8, !tbaa [[TBAA20]]
+; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
@@ -1922,9 +2046,11 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[TMP0]], align 8, !tbaa [[TBAA20]]
+; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
@@ -2024,10 +2150,13 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; AMDGPU-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP5:%.*]] = load ptr, ptr addrspace(5) [[TMP4]], align 8
+; AMDGPU-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP6]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
@@ -2036,10 +2165,13 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; NVPTX-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP5:%.*]] = load ptr, ptr addrspace(5) [[TMP4]], align 8
+; NVPTX-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA20]]
+; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP6]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
@@ -2048,10 +2180,13 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; AMDGPU-DISABLED1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP5:%.*]] = load ptr, ptr addrspace(5) [[TMP4]], align 8
+; AMDGPU-DISABLED1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP6]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
@@ -2060,10 +2195,13 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; AMDGPU-DISABLED2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP5:%.*]] = load ptr, ptr addrspace(5) [[TMP4]], align 8
+; AMDGPU-DISABLED2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP6]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
@@ -2072,10 +2210,13 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; NVPTX-DISABLED1-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP5:%.*]] = load ptr, ptr addrspace(5) [[TMP4]], align 8
+; NVPTX-DISABLED1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA20]]
+; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP6]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
@@ -2084,10 +2225,13 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; NVPTX-DISABLED2-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP5:%.*]] = load ptr, ptr addrspace(5) [[TMP4]], align 8
+; NVPTX-DISABLED2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA20]]
+; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP6]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
@@ -2147,6 +2291,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
@@ -2192,6 +2338,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
@@ -2238,6 +2386,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -2254,6 +2404,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -2299,6 +2451,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
@@ -2315,6 +2469,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
@@ -2816,6 +2972,8 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
@@ -2826,6 +2984,8 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
@@ -2836,6 +2996,8 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
@@ -2846,6 +3008,8 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
@@ -2856,6 +3020,8 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
@@ -2866,6 +3032,8 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
index 2f1aadc073142..10804a5e270f1 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
@@ -72,17 +72,18 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK: user_code.entry:
; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[N]], 42
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[C]], ptr [[AL32]], ptr addrspacecast (ptr addrspace(5) @LocGlob to ptr)
-; CHECK-NEXT: store ptr [[SELECT]], ptr [[LOC]], align 8
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[LOC]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[SELECT]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
; CHECK-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
; CHECK-NEXT: [[SEXT:%.*]] = shl i64 [[N]], 32
; CHECK-NEXT: [[IDXPROM_I:%.*]] = ashr exact i64 [[SEXT]], 32
; CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]]
; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]]
; CHECK: region.check.tid:
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
; CHECK-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]]
@@ -91,7 +92,7 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK: region.guarded.end:
; CHECK-NEXT: br label [[REGION_BARRIER]]
; CHECK: region.barrier:
-; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP2]])
+; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP3]])
; CHECK-NEXT: br label [[REGION_EXIT:%.*]]
; CHECK: region.exit:
; CHECK-NEXT: call void @usei8ptr(ptr captures(none) [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
@@ -107,18 +108,19 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]]
; CHECK-NEXT: br label [[REGION_CHECK_TID5:%.*]]
; CHECK: region.check.tid5:
-; CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]]
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[TMP6]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]]
; CHECK: region.guarded4:
; CHECK-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END1:%.*]]
; CHECK: region.guarded.end1:
; CHECK-NEXT: br label [[REGION_BARRIER2]]
; CHECK: region.barrier2:
-; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP4]])
+; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP5]])
; CHECK-NEXT: br label [[REGION_EXIT3]]
; CHECK: region.exit3:
+; CHECK-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[SELECT]] to ptr addrspace(5)
; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
; CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: __omp_outlined__.exit:
@@ -128,16 +130,16 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
; CHECK-NEXT: br label [[REGION_CHECK_TID10:%.*]]
; CHECK: region.check.tid10:
-; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
-; CHECK-NEXT: br i1 [[TMP7]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]]
+; CHECK-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[TMP9]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]]
; CHECK: region.guarded9:
; CHECK-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END6:%.*]]
; CHECK: region.guarded.end6:
; CHECK-NEXT: br label [[REGION_BARRIER7]]
; CHECK: region.barrier7:
-; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP6]])
+; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP8]])
; CHECK-NEXT: br label [[REGION_EXIT8:%.*]]
; CHECK: region.exit8:
; CHECK-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
@@ -145,16 +147,16 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
; CHECK-NEXT: br label [[REGION_CHECK_TID15:%.*]]
; CHECK: region.check.tid15:
-; CHECK-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[TMP9]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]]
+; CHECK-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
+; CHECK-NEXT: br i1 [[TMP11]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]]
; CHECK: region.guarded14:
; CHECK-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END11:%.*]]
; CHECK: region.guarded.end11:
; CHECK-NEXT: br label [[REGION_BARRIER12]]
; CHECK: region.barrier12:
-; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP8]])
+; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP10]])
; CHECK-NEXT: br label [[REGION_EXIT13:%.*]]
; CHECK: region.exit13:
; CHECK-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
@@ -162,16 +164,16 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
; CHECK-NEXT: br label [[REGION_CHECK_TID20:%.*]]
; CHECK: region.check.tid20:
-; CHECK-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
-; CHECK-NEXT: br i1 [[TMP11]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]]
+; CHECK-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[TMP13]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]]
; CHECK: region.guarded19:
; CHECK-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END16:%.*]]
; CHECK: region.guarded.end16:
; CHECK-NEXT: br label [[REGION_BARRIER17]]
; CHECK: region.barrier17:
-; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP10]])
+; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP12]])
; CHECK-NEXT: br label [[REGION_EXIT18:%.*]]
; CHECK: region.exit18:
; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
@@ -228,8 +230,9 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-DISABLED: user_code.entry:
; CHECK-DISABLED-NEXT: [[C:%.*]] = icmp eq i64 [[N]], 42
; CHECK-DISABLED-NEXT: [[SELECT:%.*]] = select i1 [[C]], ptr [[AL32]], ptr addrspacecast (ptr addrspace(5) @LocGlob to ptr)
-; CHECK-DISABLED-NEXT: store ptr [[SELECT]], ptr [[LOC]], align 8
-; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
+; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[LOC]] to ptr addrspace(5)
+; CHECK-DISABLED-NEXT: store ptr [[SELECT]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
; CHECK-DISABLED-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
; CHECK-DISABLED-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
; CHECK-DISABLED-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]]
@@ -249,6 +252,7 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-DISABLED-NEXT: [[IDXPROM4_I:%.*]] = zext i32 [[I_0_I]] to i64
; CHECK-DISABLED-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]]
; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[SELECT]] to ptr addrspace(5)
; CHECK-DISABLED-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
; CHECK-DISABLED-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK-DISABLED: __omp_outlined__.exit:
diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll
index a644fe1b2f821..3a3ecafc32b86 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll
@@ -195,6 +195,8 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; CHECK-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
; CHECK-NEXT: ret void
;
@@ -203,6 +205,8 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
@@ -224,6 +228,9 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 {
; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; CHECK-NEXT: ret void
@@ -235,6 +242,9 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 {
; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; CHECK-DISABLE-SPMDIZATION-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
diff --git a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll
index ef8caf48e57b7..d61600fefbdd2 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll
@@ -56,22 +56,24 @@ define internal void @spmd_callees__debug(i1 %c) {
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10:[0-9]+]]
-; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: store i32 0, ptr addrspace(5) [[TMP2]], align 4
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12:![0-9]+]]
; AMDGPU-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable1, ptr @__omp_outlined_spmd_amenable2
-; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable2
-; AMDGPU-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
-; AMDGPU: 3:
-; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
-; AMDGPU-NEXT: br label [[TMP7:%.*]]
-; AMDGPU: 4:
-; AMDGPU-NEXT: br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]]
+; AMDGPU-NEXT: [[TMP4:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable2
+; AMDGPU-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP6:%.*]]
; AMDGPU: 5:
-; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
-; AMDGPU-NEXT: br label [[TMP7]]
+; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; AMDGPU-NEXT: br label [[TMP9:%.*]]
; AMDGPU: 6:
-; AMDGPU-NEXT: unreachable
+; AMDGPU-NEXT: br i1 true, label [[TMP7:%.*]], label [[TMP8:%.*]]
; AMDGPU: 7:
+; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; AMDGPU-NEXT: br label [[TMP9]]
+; AMDGPU: 8:
+; AMDGPU-NEXT: unreachable
+; AMDGPU: 9:
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
@@ -87,22 +89,24 @@ define internal void @spmd_callees__debug(i1 %c) {
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10:[0-9]+]]
-; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: store i32 0, ptr addrspace(5) [[TMP2]], align 4
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12:![0-9]+]]
; NVPTX-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable1, ptr @__omp_outlined_spmd_amenable2
-; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable2
-; NVPTX-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
-; NVPTX: 3:
-; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
-; NVPTX-NEXT: br label [[TMP7:%.*]]
-; NVPTX: 4:
-; NVPTX-NEXT: br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]]
+; NVPTX-NEXT: [[TMP4:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable2
+; NVPTX-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP6:%.*]]
; NVPTX: 5:
-; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
-; NVPTX-NEXT: br label [[TMP7]]
+; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; NVPTX-NEXT: br label [[TMP9:%.*]]
; NVPTX: 6:
-; NVPTX-NEXT: unreachable
+; NVPTX-NEXT: br i1 true, label [[TMP7:%.*]], label [[TMP8:%.*]]
; NVPTX: 7:
+; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; NVPTX-NEXT: br label [[TMP9]]
+; NVPTX: 8:
+; NVPTX-NEXT: unreachable
+; NVPTX: 9:
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
;
@@ -217,6 +221,8 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
; AMDGPU-NEXT: ret void
@@ -227,6 +233,8 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
; NVPTX-NEXT: ret void
@@ -340,6 +348,8 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
; AMDGPU-NEXT: ret void
@@ -350,6 +360,8 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
; NVPTX-NEXT: ret void
@@ -412,22 +424,24 @@ define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 {
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
-; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: store i32 0, ptr addrspace(5) [[TMP2]], align 4
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable3, ptr @__omp_outlined_not_spmd_amenable
-; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_not_spmd_amenable
-; AMDGPU-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
-; AMDGPU: 3:
-; AMDGPU-NEXT: call void @__omp_outlined_not_spmd_amenable(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
-; AMDGPU-NEXT: br label [[TMP7:%.*]]
-; AMDGPU: 4:
-; AMDGPU-NEXT: br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]]
+; AMDGPU-NEXT: [[TMP4:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_not_spmd_amenable
+; AMDGPU-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP6:%.*]]
; AMDGPU: 5:
-; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
-; AMDGPU-NEXT: br label [[TMP7]]
+; AMDGPU-NEXT: call void @__omp_outlined_not_spmd_amenable(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; AMDGPU-NEXT: br label [[TMP9:%.*]]
; AMDGPU: 6:
-; AMDGPU-NEXT: unreachable
+; AMDGPU-NEXT: br i1 true, label [[TMP7:%.*]], label [[TMP8:%.*]]
; AMDGPU: 7:
+; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; AMDGPU-NEXT: br label [[TMP9]]
+; AMDGPU: 8:
+; AMDGPU-NEXT: unreachable
+; AMDGPU: 9:
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
@@ -472,22 +486,24 @@ define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 {
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
-; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: store i32 0, ptr addrspace(5) [[TMP2]], align 4
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable3, ptr @__omp_outlined_not_spmd_amenable
-; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_not_spmd_amenable
-; NVPTX-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
-; NVPTX: 3:
-; NVPTX-NEXT: call void @__omp_outlined_not_spmd_amenable(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
-; NVPTX-NEXT: br label [[TMP7:%.*]]
-; NVPTX: 4:
-; NVPTX-NEXT: br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]]
+; NVPTX-NEXT: [[TMP4:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_not_spmd_amenable
+; NVPTX-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP6:%.*]]
; NVPTX: 5:
-; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
-; NVPTX-NEXT: br label [[TMP7]]
+; NVPTX-NEXT: call void @__omp_outlined_not_spmd_amenable(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; NVPTX-NEXT: br label [[TMP9:%.*]]
; NVPTX: 6:
-; NVPTX-NEXT: unreachable
+; NVPTX-NEXT: br i1 true, label [[TMP7:%.*]], label [[TMP8:%.*]]
; NVPTX: 7:
+; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; NVPTX-NEXT: br label [[TMP9]]
+; NVPTX: 8:
+; NVPTX-NEXT: unreachable
+; NVPTX: 9:
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
;
@@ -530,9 +546,10 @@ define internal void @__omp_outlined_spmd_amenable3(ptr noalias %.global_tid., p
; AMDGPU-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR10]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: store ptr [[X]], ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; AMDGPU-NEXT: store ptr [[X]], ptr addrspace(5) [[TMP0]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
@@ -551,9 +568,10 @@ define internal void @__omp_outlined_spmd_amenable3(ptr noalias %.global_tid., p
; NVPTX-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR10]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: store ptr [[X]], ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; NVPTX-NEXT: store ptr [[X]], ptr addrspace(5) [[TMP0]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
@@ -620,10 +638,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR10]]
+; AMDGPU-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP5:%.*]] = load ptr, ptr addrspace(5) [[TMP4]], align 8
+; AMDGPU-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP6]]) #[[ATTR10]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
@@ -632,10 +653,13 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR10]]
+; NVPTX-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP5:%.*]] = load ptr, ptr addrspace(5) [[TMP4]], align 8
+; NVPTX-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA20]]
+; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP6]]) #[[ATTR10]]
; NVPTX-NEXT: ret void
;
entry:
@@ -667,8 +691,10 @@ define weak ptx_kernel void @spmd_callees_metadata(ptr %fp) #0 {
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
-; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: store i32 0, ptr addrspace(5) [[TMP2]], align 4
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
@@ -685,8 +711,10 @@ define weak ptx_kernel void @spmd_callees_metadata(ptr %fp) #0 {
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
-; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: store i32 0, ptr addrspace(5) [[TMP2]], align 4
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
@@ -756,21 +784,23 @@ define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
-; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable_external
-; AMDGPU-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
-; AMDGPU: 3:
-; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
-; AMDGPU-NEXT: br label [[TMP7:%.*]]
-; AMDGPU: 4:
-; AMDGPU-NEXT: br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; AMDGPU-NEXT: store i32 0, ptr addrspace(5) [[TMP2]], align 4
+; AMDGPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: [[TMP4:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable_external
+; AMDGPU-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP6:%.*]]
; AMDGPU: 5:
-; AMDGPU-NEXT: call void @__omp_outlined_not_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
-; AMDGPU-NEXT: br label [[TMP7]]
+; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
+; AMDGPU-NEXT: br label [[TMP9:%.*]]
; AMDGPU: 6:
-; AMDGPU-NEXT: unreachable
+; AMDGPU-NEXT: br i1 true, label [[TMP7:%.*]], label [[TMP8:%.*]]
; AMDGPU: 7:
+; AMDGPU-NEXT: call void @__omp_outlined_not_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
+; AMDGPU-NEXT: br label [[TMP9]]
+; AMDGPU: 8:
+; AMDGPU-NEXT: unreachable
+; AMDGPU: 9:
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
@@ -815,21 +845,23 @@ define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
-; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable_external
-; NVPTX-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
-; NVPTX: 3:
-; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
-; NVPTX-NEXT: br label [[TMP7:%.*]]
-; NVPTX: 4:
-; NVPTX-NEXT: br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]]
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
+; NVPTX-NEXT: store i32 0, ptr addrspace(5) [[TMP2]], align 4
+; NVPTX-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTTHREADID_TEMP_]] to ptr addrspace(5)
+; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[TMP3]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: [[TMP4:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable_external
+; NVPTX-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP6:%.*]]
; NVPTX: 5:
-; NVPTX-NEXT: call void @__omp_outlined_not_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
-; NVPTX-NEXT: br label [[TMP7]]
+; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
+; NVPTX-NEXT: br label [[TMP9:%.*]]
; NVPTX: 6:
-; NVPTX-NEXT: unreachable
+; NVPTX-NEXT: br i1 true, label [[TMP7:%.*]], label [[TMP8:%.*]]
; NVPTX: 7:
+; NVPTX-NEXT: call void @__omp_outlined_not_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
+; NVPTX-NEXT: br label [[TMP9]]
+; NVPTX: 8:
+; NVPTX-NEXT: unreachable
+; NVPTX: 9:
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
;
diff --git a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll
index 1cfce147ac81e..5b1470f985411 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll
@@ -281,6 +281,8 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; CHECK-NEXT: call void @leaf() #[[ATTR8]]
; CHECK-NEXT: ret void
;
@@ -289,6 +291,8 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID__ADDR]] to ptr addrspace(5)
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DOTBOUND_TID__ADDR]] to ptr addrspace(5)
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR8]]
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
@@ -310,6 +314,9 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 {
; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR8]]
; CHECK-NEXT: ret void
@@ -321,6 +328,9 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 {
; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; CHECK-DISABLE-SPMDIZATION-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTADDR]] to ptr addrspace(5)
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[DOTADDR1]] to ptr addrspace(5)
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DOTZERO_ADDR]] to ptr addrspace(5)
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR8]]
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
More information about the llvm-branch-commits
mailing list